Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 /* SM 4.0 says that subroutines can nest 32 deep and
70 * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72
73 #define DUMP_GS_EMITS 0
74
75 /*
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77 * instruction.
78 *
79 * TODO:
80 * - take execution masks in consideration
81 * - debug control-flow instructions
82 */
83 #define DEBUG_EXECUTION 0
84
85
86 /*
87 * Emit code to print a register value.
88 */
89 static void
90 emit_dump_reg(struct gallivm_state *gallivm,
91 unsigned file,
92 unsigned index,
93 unsigned chan,
94 LLVMValueRef value)
95 {
96 char buf[32];
97
98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
99 tgsi_file_name(file),
100 index, "xyzw"[chan]);
101
102 lp_build_print_value(gallivm, buf, value);
103 }
104
105 /*
106 * Return the context for the current function.
107 * (always 'main', if shader doesn't do any function calls)
108 */
109 static inline struct function_ctx *
110 func_ctx(struct lp_exec_mask *mask)
111 {
112 assert(mask->function_stack_size > 0);
113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114 return &mask->function_stack[mask->function_stack_size - 1];
115 }
116
117 /*
118 * Returns true if we're in a loop.
119 * It's global, meaning that it returns true even if there's
120 * no loop inside the current function, but we were inside
121 * a loop inside another function, from which this one was called.
122 */
123 static inline boolean
124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126 int i;
127 for (i = mask->function_stack_size - 1; i >= 0; --i) {
128 const struct function_ctx *ctx = &mask->function_stack[i];
129 if (ctx->loop_stack_size > 0)
130 return TRUE;
131 }
132 return FALSE;
133 }
134
135 /*
136 * Returns true if we're inside a switch statement.
137 * It's global, meaning that it returns true even if there's
138 * no switch in the current function, but we were inside
139 * a switch inside another function, from which this one was called.
140 */
141 static inline boolean
142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144 int i;
145 for (i = mask->function_stack_size - 1; i >= 0; --i) {
146 const struct function_ctx *ctx = &mask->function_stack[i];
147 if (ctx->switch_stack_size > 0)
148 return TRUE;
149 }
150 return FALSE;
151 }
152
153 /*
154 * Returns true if we're inside a conditional.
155 * It's global, meaning that it returns true even if there's
156 * no conditional in the current function, but we were inside
157 * a conditional inside another function, from which this one was called.
158 */
159 static inline boolean
160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162 int i;
163 for (i = mask->function_stack_size - 1; i >= 0; --i) {
164 const struct function_ctx *ctx = &mask->function_stack[i];
165 if (ctx->cond_stack_size > 0)
166 return TRUE;
167 }
168 return FALSE;
169 }
170
171
172 /*
173 * Initialize a function context at the specified index.
174 */
175 static void
176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179 LLVMBuilderRef builder = mask->bld->gallivm->builder;
180 struct function_ctx *ctx = &mask->function_stack[function_idx];
181
182 ctx->cond_stack_size = 0;
183 ctx->loop_stack_size = 0;
184 ctx->switch_stack_size = 0;
185
186 if (function_idx == 0) {
187 ctx->ret_mask = mask->ret_mask;
188 }
189
190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191 int_type, "looplimiter");
192 LLVMBuildStore(
193 builder,
194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195 ctx->loop_limiter);
196 }
197
198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200 mask->bld = bld;
201 mask->has_mask = FALSE;
202 mask->ret_in_main = FALSE;
203 /* For the main function */
204 mask->function_stack_size = 1;
205
206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208 mask->cond_mask = mask->switch_mask =
209 LLVMConstAllOnes(mask->int_vec_type);
210
211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212 sizeof(mask->function_stack[0]));
213 lp_exec_mask_function_init(mask, 0);
214 }
215
216 static void
217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219 FREE(mask->function_stack);
220 }
221
222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224 LLVMBuilderRef builder = mask->bld->gallivm->builder;
225 boolean has_loop_mask = mask_has_loop(mask);
226 boolean has_cond_mask = mask_has_cond(mask);
227 boolean has_switch_mask = mask_has_switch(mask);
228 boolean has_ret_mask = mask->function_stack_size > 1 ||
229 mask->ret_in_main;
230
231 if (has_loop_mask) {
232 /*for loops we need to update the entire mask at runtime */
233 LLVMValueRef tmp;
234 assert(mask->break_mask);
235 tmp = LLVMBuildAnd(builder,
236 mask->cont_mask,
237 mask->break_mask,
238 "maskcb");
239 mask->exec_mask = LLVMBuildAnd(builder,
240 mask->cond_mask,
241 tmp,
242 "maskfull");
243 } else
244 mask->exec_mask = mask->cond_mask;
245
246 if (has_switch_mask) {
247 mask->exec_mask = LLVMBuildAnd(builder,
248 mask->exec_mask,
249 mask->switch_mask,
250 "switchmask");
251 }
252
253 if (has_ret_mask) {
254 mask->exec_mask = LLVMBuildAnd(builder,
255 mask->exec_mask,
256 mask->ret_mask,
257 "callmask");
258 }
259
260 mask->has_mask = (has_cond_mask ||
261 has_loop_mask ||
262 has_switch_mask ||
263 has_ret_mask);
264 }
265
266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267 LLVMValueRef val)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 struct function_ctx *ctx = func_ctx(mask);
271
272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273 ctx->cond_stack_size++;
274 return;
275 }
276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278 }
279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280 assert(LLVMTypeOf(val) == mask->int_vec_type);
281 mask->cond_mask = LLVMBuildAnd(builder,
282 mask->cond_mask,
283 val,
284 "");
285 lp_exec_mask_update(mask);
286 }
287
288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292 LLVMValueRef prev_mask;
293 LLVMValueRef inv_mask;
294
295 assert(ctx->cond_stack_size);
296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297 return;
298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301 }
302
303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
305 mask->cond_mask = LLVMBuildAnd(builder,
306 inv_mask,
307 prev_mask, "");
308 lp_exec_mask_update(mask);
309 }
310
311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313 struct function_ctx *ctx = func_ctx(mask);
314 assert(ctx->cond_stack_size);
315 --ctx->cond_stack_size;
316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317 return;
318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319 lp_exec_mask_update(mask);
320 }
321
322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324 LLVMBuilderRef builder = mask->bld->gallivm->builder;
325 struct function_ctx *ctx = func_ctx(mask);
326
327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328 ++ctx->loop_stack_size;
329 return;
330 }
331
332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333 ctx->break_type;
334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340 ++ctx->loop_stack_size;
341
342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
347 LLVMBuildBr(builder, ctx->loop_block);
348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
352 lp_exec_mask_update(mask);
353 }
354
355 static void lp_exec_break(struct lp_exec_mask *mask,
356 struct lp_build_tgsi_context * bld_base)
357 {
358 LLVMBuilderRef builder = mask->bld->gallivm->builder;
359 struct function_ctx *ctx = func_ctx(mask);
360
361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362 LLVMValueRef exec_mask = LLVMBuildNot(builder,
363 mask->exec_mask,
364 "break");
365
366 mask->break_mask = LLVMBuildAnd(builder,
367 mask->break_mask,
368 exec_mask, "break_full");
369 }
370 else {
371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373 opcode == TGSI_OPCODE_CASE);
374
375
376 if (ctx->switch_in_default) {
377 /*
378 * stop default execution but only if this is an unconditional switch.
379 * (The condition here is not perfect since dead code after break is
380 * allowed but should be sufficient since false negatives are just
381 * unoptimized - so we don't have to pre-evaluate that).
382 */
383 if(break_always && ctx->switch_pc) {
384 bld_base->pc = ctx->switch_pc;
385 return;
386 }
387 }
388
389 if (break_always) {
390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391 }
392 else {
393 LLVMValueRef exec_mask = LLVMBuildNot(builder,
394 mask->exec_mask,
395 "break");
396 mask->switch_mask = LLVMBuildAnd(builder,
397 mask->switch_mask,
398 exec_mask, "break_switch");
399 }
400 }
401
402 lp_exec_mask_update(mask);
403 }
404
405 static void lp_exec_break_condition(struct lp_exec_mask *mask,
406 LLVMValueRef cond)
407 {
408 LLVMBuilderRef builder = mask->bld->gallivm->builder;
409 struct function_ctx *ctx = func_ctx(mask);
410 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
411 mask->exec_mask,
412 cond, "cond_mask");
413 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
414
415 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
416 mask->break_mask = LLVMBuildAnd(builder,
417 mask->break_mask,
418 cond_mask, "breakc_full");
419 }
420 else {
421 mask->switch_mask = LLVMBuildAnd(builder,
422 mask->switch_mask,
423 cond_mask, "breakc_switch");
424 }
425
426 lp_exec_mask_update(mask);
427 }
428
429 static void lp_exec_continue(struct lp_exec_mask *mask)
430 {
431 LLVMBuilderRef builder = mask->bld->gallivm->builder;
432 LLVMValueRef exec_mask = LLVMBuildNot(builder,
433 mask->exec_mask,
434 "");
435
436 mask->cont_mask = LLVMBuildAnd(builder,
437 mask->cont_mask,
438 exec_mask, "");
439
440 lp_exec_mask_update(mask);
441 }
442
443
444 static void lp_exec_endloop(struct gallivm_state *gallivm,
445 struct lp_exec_mask *mask)
446 {
447 LLVMBuilderRef builder = mask->bld->gallivm->builder;
448 struct function_ctx *ctx = func_ctx(mask);
449 LLVMBasicBlockRef endloop;
450 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
451 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
452 mask->bld->type.width *
453 mask->bld->type.length);
454 LLVMValueRef i1cond, i2cond, icond, limiter;
455
456 assert(mask->break_mask);
457
458
459 assert(ctx->loop_stack_size);
460 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
461 --ctx->loop_stack_size;
462 return;
463 }
464
465 /*
466 * Restore the cont_mask, but don't pop
467 */
468 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
469 lp_exec_mask_update(mask);
470
471 /*
472 * Unlike the continue mask, the break_mask must be preserved across loop
473 * iterations
474 */
475 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
476
477 /* Decrement the loop limiter */
478 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
479
480 limiter = LLVMBuildSub(
481 builder,
482 limiter,
483 LLVMConstInt(int_type, 1, false),
484 "");
485
486 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
487
488 /* i1cond = (mask != 0) */
489 i1cond = LLVMBuildICmp(
490 builder,
491 LLVMIntNE,
492 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
493 LLVMConstNull(reg_type), "i1cond");
494
495 /* i2cond = (looplimiter > 0) */
496 i2cond = LLVMBuildICmp(
497 builder,
498 LLVMIntSGT,
499 limiter,
500 LLVMConstNull(int_type), "i2cond");
501
502 /* if( i1cond && i2cond ) */
503 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
504
505 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
506
507 LLVMBuildCondBr(builder,
508 icond, ctx->loop_block, endloop);
509
510 LLVMPositionBuilderAtEnd(builder, endloop);
511
512 assert(ctx->loop_stack_size);
513 --ctx->loop_stack_size;
514 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
515 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
516 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
517 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
518 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
519 ctx->switch_stack_size];
520
521 lp_exec_mask_update(mask);
522 }
523
524 static void lp_exec_switch(struct lp_exec_mask *mask,
525 LLVMValueRef switchval)
526 {
527 struct function_ctx *ctx = func_ctx(mask);
528
529 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
530 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
531 ctx->switch_stack_size++;
532 return;
533 }
534
535 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
536 ctx->break_type;
537 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
538
539 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
540 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
541 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
542 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
543 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
544 ctx->switch_stack_size++;
545
546 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
547 ctx->switch_val = switchval;
548 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
549 ctx->switch_in_default = false;
550 ctx->switch_pc = 0;
551
552 lp_exec_mask_update(mask);
553 }
554
555 static void lp_exec_endswitch(struct lp_exec_mask *mask,
556 struct lp_build_tgsi_context * bld_base)
557 {
558 LLVMBuilderRef builder = mask->bld->gallivm->builder;
559 struct function_ctx *ctx = func_ctx(mask);
560
561 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
562 ctx->switch_stack_size--;
563 return;
564 }
565
566 /* check if there's deferred default if so do it now */
567 if (ctx->switch_pc && !ctx->switch_in_default) {
568 LLVMValueRef prevmask, defaultmask;
569 unsigned tmp_pc;
570 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
571 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
572 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
573 ctx->switch_in_default = true;
574
575 lp_exec_mask_update(mask);
576
577 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
578 TGSI_OPCODE_DEFAULT);
579
580 tmp_pc = bld_base->pc;
581 bld_base->pc = ctx->switch_pc;
582 /*
583 * re-purpose switch_pc to point to here again, since we stop execution of
584 * the deferred default after next break.
585 */
586 ctx->switch_pc = tmp_pc - 1;
587
588 return;
589 }
590
591 else if (ctx->switch_pc && ctx->switch_in_default) {
592 assert(bld_base->pc == ctx->switch_pc + 1);
593 }
594
595 ctx->switch_stack_size--;
596 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
597 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
598 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
599 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
600 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
601
602 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
603
604 lp_exec_mask_update(mask);
605 }
606
607 static void lp_exec_case(struct lp_exec_mask *mask,
608 LLVMValueRef caseval)
609 {
610 LLVMBuilderRef builder = mask->bld->gallivm->builder;
611 struct function_ctx *ctx = func_ctx(mask);
612
613 LLVMValueRef casemask, prevmask;
614
615 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
616 return;
617 }
618
619 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
620 if (!ctx->switch_in_default) {
621 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
622 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
623 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
624 ctx->switch_mask_default, "sw_default_mask");
625 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
626 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
627
628 lp_exec_mask_update(mask);
629 }
630 }
631
632 /*
633 * Analyse default statement in a switch.
634 * \return true if default is last statement, false otherwise
635 * \param default_pc_start contains pc of instruction to jump to
636 * if default wasn't last but there's no
637 * fallthrough into default.
638 */
639 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
640 struct lp_build_tgsi_context * bld_base,
641 int *default_pc_start)
642 {
643 unsigned pc = bld_base->pc;
644 struct function_ctx *ctx = func_ctx(mask);
645 int curr_switch_stack = ctx->switch_stack_size;
646
647 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
648 return false;
649 }
650
651 /* skip over case statements which are together with default */
652 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
653 pc++;
654 }
655
656 while (pc != ~0u && pc < bld_base->num_instructions) {
657 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
658 switch (opcode) {
659 case TGSI_OPCODE_CASE:
660 if (curr_switch_stack == ctx->switch_stack_size) {
661 *default_pc_start = pc - 1;
662 return false;
663 }
664 break;
665 case TGSI_OPCODE_SWITCH:
666 curr_switch_stack++;
667 break;
668 case TGSI_OPCODE_ENDSWITCH:
669 if (curr_switch_stack == ctx->switch_stack_size) {
670 *default_pc_start = pc - 1;
671 return true;
672 }
673 curr_switch_stack--;
674 break;
675 }
676 pc++;
677 }
678 /* should never arrive here */
679 assert(0);
680 return true;
681 }
682
683 static void lp_exec_default(struct lp_exec_mask *mask,
684 struct lp_build_tgsi_context * bld_base)
685 {
686 LLVMBuilderRef builder = mask->bld->gallivm->builder;
687 struct function_ctx *ctx = func_ctx(mask);
688
689 int default_exec_pc;
690 boolean default_is_last;
691
692 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693 return;
694 }
695
696 /*
697 * This is a messy opcode, because it may not be always at the end and
698 * there can be fallthrough in and out of it.
699 */
700
701 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702 /*
703 * If it is last statement in switch (note that case statements appearing
704 * "at the same time" as default don't change that) everything is just fine,
705 * update switch mask and go on. This means we can handle default with
706 * fallthrough INTO it without overhead, if it is last.
707 */
708 if (default_is_last) {
709 LLVMValueRef prevmask, defaultmask;
710 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714 ctx->switch_in_default = true;
715
716 lp_exec_mask_update(mask);
717 }
718 else {
719 /*
720 * Technically, "case" immediately before default isn't really a
721 * fallthrough, however we still have to count them as such as we
722 * already have updated the masks.
723 * If that happens in practice could add a switch optimizer pass
724 * which just gets rid of all case statements appearing together with
725 * default (or could do switch analysis at switch start time instead).
726 */
727 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
728 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
729 opcode != TGSI_OPCODE_SWITCH);
730 /*
731 * If it is not last statement and there was no fallthrough into it,
732 * we record the PC and continue execution at next case (again, those
733 * case encountered at the same time don't count). At endswitch
734 * time, we update switchmask, and go back executing the code we skipped
735 * until the next break (possibly re-executing some code with changed mask
736 * if there was a fallthrough out of default).
737 * Finally, if it is not last statement and there was a fallthrough into it,
738 * do the same as with the former case, except instead of skipping the code
739 * just execute it without updating the mask, then go back and re-execute.
740 */
741 ctx->switch_pc = bld_base->pc;
742 if (!ft_into) {
743 bld_base->pc = default_exec_pc;
744 }
745 }
746 }
747
748
749 /* stores val into an address pointed to by dst_ptr.
750 * mask->exec_mask is used to figure out which bits of val
751 * should be stored into the address
752 * (0 means don't store this bit, 1 means do store).
753 */
754 static void lp_exec_mask_store(struct lp_exec_mask *mask,
755 struct lp_build_context *bld_store,
756 LLVMValueRef val,
757 LLVMValueRef dst_ptr)
758 {
759 LLVMBuilderRef builder = mask->bld->gallivm->builder;
760 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
761
762 assert(lp_check_value(bld_store->type, val));
763 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
764 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
765
766 if (exec_mask) {
767 LLVMValueRef res, dst;
768
769 dst = LLVMBuildLoad(builder, dst_ptr, "");
770 res = lp_build_select(bld_store, exec_mask, val, dst);
771 LLVMBuildStore(builder, res, dst_ptr);
772 } else
773 LLVMBuildStore(builder, val, dst_ptr);
774 }
775
776 static void lp_exec_mask_call(struct lp_exec_mask *mask,
777 int func,
778 int *pc)
779 {
780 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
781 return;
782 }
783
784 lp_exec_mask_function_init(mask, mask->function_stack_size);
785 mask->function_stack[mask->function_stack_size].pc = *pc;
786 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
787 mask->function_stack_size++;
788 *pc = func;
789 }
790
791 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
792 {
793 LLVMBuilderRef builder = mask->bld->gallivm->builder;
794 struct function_ctx *ctx = func_ctx(mask);
795 LLVMValueRef exec_mask;
796
797 if (ctx->cond_stack_size == 0 &&
798 ctx->loop_stack_size == 0 &&
799 ctx->switch_stack_size == 0 &&
800 mask->function_stack_size == 1) {
801 /* returning from main() */
802 *pc = -1;
803 return;
804 }
805
806 if (mask->function_stack_size == 1) {
807 /*
808 * This requires special handling since we need to ensure
809 * we don't drop the mask even if we have no call stack
810 * (e.g. after a ret in a if clause after the endif)
811 */
812 mask->ret_in_main = TRUE;
813 }
814
815 exec_mask = LLVMBuildNot(builder,
816 mask->exec_mask,
817 "ret");
818
819 mask->ret_mask = LLVMBuildAnd(builder,
820 mask->ret_mask,
821 exec_mask, "ret_full");
822
823 lp_exec_mask_update(mask);
824 }
825
826 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
827 {
828 }
829
830 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
831 {
832 struct function_ctx *ctx;
833
834 assert(mask->function_stack_size > 1);
835 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
836
837 ctx = func_ctx(mask);
838 mask->function_stack_size--;
839
840 *pc = ctx->pc;
841 mask->ret_mask = ctx->ret_mask;
842
843 lp_exec_mask_update(mask);
844 }
845
846
847 static LLVMValueRef
848 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
849 unsigned file,
850 int index,
851 unsigned chan)
852 {
853 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
854 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
855 LLVMValueRef var_of_array;
856
857 switch (file) {
858 case TGSI_FILE_TEMPORARY:
859 array_of_vars = bld->temps;
860 var_of_array = bld->temps_array;
861 break;
862 case TGSI_FILE_OUTPUT:
863 array_of_vars = bld->outputs;
864 var_of_array = bld->outputs_array;
865 break;
866 default:
867 assert(0);
868 return NULL;
869 }
870
871 assert(chan < 4);
872
873 if (bld->indirect_files & (1 << file)) {
874 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
875 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
876 }
877 else {
878 assert(index <= bld->bld_base.info->file_max[file]);
879 return array_of_vars[index][chan];
880 }
881 }
882
883
884 /**
885 * Return pointer to a temporary register channel (src or dest).
886 * Note that indirect addressing cannot be handled here.
887 * \param index which temporary register
888 * \param chan which channel of the temp register.
889 */
890 LLVMValueRef
891 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
892 unsigned index,
893 unsigned chan)
894 {
895 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
896 }
897
898 /**
899 * Return pointer to a output register channel (src or dest).
900 * Note that indirect addressing cannot be handled here.
901 * \param index which output register
902 * \param chan which channel of the output register.
903 */
904 LLVMValueRef
905 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
906 unsigned index,
907 unsigned chan)
908 {
909 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
910 }
911
912 /*
913 * If we have indirect addressing in outputs copy our alloca array
914 * to the outputs slots specified by the caller to make sure
915 * our outputs are delivered consistently via the same interface.
916 */
917 static void
918 gather_outputs(struct lp_build_tgsi_soa_context * bld)
919 {
920 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
921 unsigned index, chan;
922 assert(bld->bld_base.info->num_outputs <=
923 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
924 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
925 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
926 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
927 }
928 }
929 }
930 }
931
932 /**
933 * Gather vector.
934 * XXX the lp_build_gather() function should be capable of doing this
935 * with a little work.
936 */
937 static LLVMValueRef
938 build_gather(struct lp_build_tgsi_context *bld_base,
939 LLVMValueRef base_ptr,
940 LLVMValueRef indexes,
941 LLVMValueRef overflow_mask,
942 LLVMValueRef indexes2)
943 {
944 struct gallivm_state *gallivm = bld_base->base.gallivm;
945 LLVMBuilderRef builder = gallivm->builder;
946 struct lp_build_context *uint_bld = &bld_base->uint_bld;
947 struct lp_build_context *bld = &bld_base->base;
948 LLVMValueRef res;
949 unsigned i;
950
951 if (indexes2)
952 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
953 else
954 res = bld->undef;
955 /*
956 * overflow_mask is a vector telling us which channels
957 * in the vector overflowed. We use the overflow behavior for
958 * constant buffers which is defined as:
959 * Out of bounds access to constant buffer returns 0 in all
960 * components. Out of bounds behavior is always with respect
961 * to the size of the buffer bound at that slot.
962 */
963
964 if (overflow_mask) {
965 /*
966 * We avoid per-element control flow here (also due to llvm going crazy,
967 * though I suspect it's better anyway since overflow is likely rare).
968 * Note that since we still fetch from buffers even if num_elements was
969 * zero (in this case we'll fetch from index zero) the jit func callers
970 * MUST provide valid fake constant buffers of size 4x32 (the values do
971 * not matter), otherwise we'd still need (not per element though)
972 * control flow.
973 */
974 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
975 if (indexes2)
976 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
977 }
978
979 /*
980 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
981 */
982 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
983 LLVMValueRef si, di;
984 LLVMValueRef index;
985 LLVMValueRef scalar_ptr, scalar;
986
987 di = lp_build_const_int32(bld->gallivm, i);
988 if (indexes2)
989 si = lp_build_const_int32(bld->gallivm, i >> 1);
990 else
991 si = di;
992
993 if (indexes2 && (i & 1)) {
994 index = LLVMBuildExtractElement(builder,
995 indexes2, si, "");
996 } else {
997 index = LLVMBuildExtractElement(builder,
998 indexes, si, "");
999 }
1000 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1001 &index, 1, "gather_ptr");
1002 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1003
1004 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
1005 }
1006
1007 if (overflow_mask) {
1008 if (indexes2) {
1009 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
1010 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1011 bld_base->dbl_bld.int_vec_type, "");
1012 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1013 bld_base->dbl_bld.zero, res);
1014 } else
1015 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1016 }
1017
1018 return res;
1019 }
1020
1021
1022 /**
1023 * Scatter/store vector.
1024 */
1025 static void
1026 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1027 LLVMValueRef base_ptr,
1028 LLVMValueRef indexes,
1029 LLVMValueRef values,
1030 struct lp_exec_mask *mask)
1031 {
1032 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1033 LLVMBuilderRef builder = gallivm->builder;
1034 unsigned i;
1035 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1036
1037 /*
1038 * Loop over elements of index_vec, store scalar value.
1039 */
1040 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1041 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1042 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1043 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1044 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1045 LLVMValueRef scalar_pred = pred ?
1046 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1047
1048 if (0)
1049 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1050 ii, val, index, scalar_ptr);
1051
1052 if (scalar_pred) {
1053 LLVMValueRef real_val, dst_val;
1054 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1055 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1056 LLVMBuildStore(builder, real_val, scalar_ptr);
1057 }
1058 else {
1059 LLVMBuildStore(builder, val, scalar_ptr);
1060 }
1061 }
1062 }
1063
1064
1065 /**
1066 * Read the current value of the ADDR register, convert the floats to
1067 * ints, add the base index and return the vector of offsets.
1068 * The offsets will be used to index into the constant buffer or
1069 * temporary register file.
1070 */
1071 static LLVMValueRef
1072 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1073 unsigned reg_file, unsigned reg_index,
1074 const struct tgsi_ind_register *indirect_reg)
1075 {
1076 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1077 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1078 /* always use X component of address register */
1079 unsigned swizzle = indirect_reg->Swizzle;
1080 LLVMValueRef base;
1081 LLVMValueRef rel;
1082 LLVMValueRef max_index;
1083 LLVMValueRef index;
1084
1085 assert(bld->indirect_files & (1 << reg_file));
1086
1087 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1088
1089 assert(swizzle < 4);
1090 switch (indirect_reg->File) {
1091 case TGSI_FILE_ADDRESS:
1092 rel = LLVMBuildLoad(builder,
1093 bld->addr[indirect_reg->Index][swizzle],
1094 "load addr reg");
1095 /* ADDR LLVM values already have LLVM integer type. */
1096 break;
1097 case TGSI_FILE_TEMPORARY:
1098 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1099 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1100 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1101 * value actually stored is expected to be an integer */
1102 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1103 break;
1104 default:
1105 assert(0);
1106 rel = uint_bld->zero;
1107 }
1108
1109 index = lp_build_add(uint_bld, base, rel);
1110
1111 /*
1112 * emit_fetch_constant handles constant buffer overflow so this code
1113 * is pointless for them.
1114 * Furthermore the D3D10 spec in section 6.5 says:
1115 * If the constant buffer bound to a slot is larger than the size
1116 * declared in the shader for that slot, implementations are allowed
1117 * to return incorrect data (not necessarily 0) for indices that are
1118 * larger than the declared size but smaller than the buffer size.
1119 */
1120 if (reg_file != TGSI_FILE_CONSTANT) {
1121 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1122 uint_bld->type,
1123 bld->bld_base.info->file_max[reg_file]);
1124
1125 assert(!uint_bld->type.sign);
1126 index = lp_build_min(uint_bld, index, max_index);
1127 }
1128
1129 return index;
1130 }
1131
1132 static struct lp_build_context *
1133 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1134 enum tgsi_opcode_type stype)
1135 {
1136 struct lp_build_context *bld_fetch;
1137
1138 switch (stype) {
1139 case TGSI_TYPE_FLOAT:
1140 case TGSI_TYPE_UNTYPED:
1141 bld_fetch = &bld_base->base;
1142 break;
1143 case TGSI_TYPE_UNSIGNED:
1144 bld_fetch = &bld_base->uint_bld;
1145 break;
1146 case TGSI_TYPE_SIGNED:
1147 bld_fetch = &bld_base->int_bld;
1148 break;
1149 case TGSI_TYPE_DOUBLE:
1150 bld_fetch = &bld_base->dbl_bld;
1151 break;
1152 case TGSI_TYPE_UNSIGNED64:
1153 bld_fetch = &bld_base->uint64_bld;
1154 break;
1155 case TGSI_TYPE_SIGNED64:
1156 bld_fetch = &bld_base->int64_bld;
1157 break;
1158 case TGSI_TYPE_VOID:
1159 default:
1160 assert(0);
1161 bld_fetch = NULL;
1162 break;
1163 }
1164 return bld_fetch;
1165 }
1166
1167 static LLVMValueRef
1168 get_soa_array_offsets(struct lp_build_context *uint_bld,
1169 LLVMValueRef indirect_index,
1170 unsigned chan_index,
1171 boolean need_perelement_offset)
1172 {
1173 struct gallivm_state *gallivm = uint_bld->gallivm;
1174 LLVMValueRef chan_vec =
1175 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1176 LLVMValueRef length_vec =
1177 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1178 LLVMValueRef index_vec;
1179
1180 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1181 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1182 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1183 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1184
1185 if (need_perelement_offset) {
1186 LLVMValueRef pixel_offsets;
1187 unsigned i;
1188 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1189 pixel_offsets = uint_bld->undef;
1190 for (i = 0; i < uint_bld->type.length; i++) {
1191 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1192 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1193 ii, ii, "");
1194 }
1195 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1196 }
1197 return index_vec;
1198 }
1199
1200 static LLVMValueRef
1201 emit_fetch_constant(
1202 struct lp_build_tgsi_context * bld_base,
1203 const struct tgsi_full_src_register * reg,
1204 enum tgsi_opcode_type stype,
1205 unsigned swizzle)
1206 {
1207 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1208 struct gallivm_state *gallivm = bld_base->base.gallivm;
1209 LLVMBuilderRef builder = gallivm->builder;
1210 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1211 unsigned dimension = 0;
1212 LLVMValueRef consts_ptr;
1213 LLVMValueRef num_consts;
1214 LLVMValueRef res;
1215
1216 /* XXX: Handle fetching xyzw components as a vector */
1217 assert(swizzle != ~0u);
1218
1219 if (reg->Register.Dimension) {
1220 assert(!reg->Dimension.Indirect);
1221 dimension = reg->Dimension.Index;
1222 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1223 }
1224
1225 consts_ptr = bld->consts[dimension];
1226 num_consts = bld->consts_sizes[dimension];
1227
1228 if (reg->Register.Indirect) {
1229 LLVMValueRef indirect_index;
1230 LLVMValueRef swizzle_vec =
1231 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1232 LLVMValueRef index_vec; /* index into the const buffer */
1233 LLVMValueRef overflow_mask;
1234 LLVMValueRef index_vec2 = NULL;
1235
1236 indirect_index = get_indirect_index(bld,
1237 reg->Register.File,
1238 reg->Register.Index,
1239 &reg->Indirect);
1240
1241 /* All fetches are from the same constant buffer, so
1242 * we need to propagate the size to a vector to do a
1243 * vector comparison */
1244 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1245 /* Construct a boolean vector telling us which channels
1246 * overflow the bound constant buffer */
1247 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1248 indirect_index, num_consts);
1249
1250 /* index_vec = indirect_index * 4 + swizzle */
1251 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1252 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1253
1254 if (tgsi_type_is_64bit(stype)) {
1255 LLVMValueRef swizzle_vec2;
1256 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
1257 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1258 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1259 }
1260 /* Gather values from the constant buffer */
1261 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1262 }
1263 else {
1264 LLVMValueRef index; /* index into the const buffer */
1265 LLVMValueRef scalar, scalar_ptr;
1266 struct lp_build_context *bld_broad = &bld_base->base;
1267 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1268
1269 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1270 &index, 1, "");
1271 if (stype == TGSI_TYPE_DOUBLE) {
1272 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1273 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1274 bld_broad = &bld_base->dbl_bld;
1275 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1276 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1277 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1278 bld_broad = &bld_base->uint64_bld;
1279 } else if (stype == TGSI_TYPE_SIGNED64) {
1280 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1281 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1282 bld_broad = &bld_base->int64_bld;
1283 }
1284 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1285 res = lp_build_broadcast_scalar(bld_broad, scalar);
1286 }
1287
1288 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1289 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1290 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1291 }
1292
1293 return res;
1294 }
1295
1296 /**
1297 * Fetch 64-bit values from two separate channels.
1298 * 64-bit values are stored split across two channels, like xy and zw.
1299 * This function creates a set of 16 floats,
1300 * extracts the values from the two channels,
1301 * puts them in the correct place, then casts to 8 64-bits.
1302 */
1303 static LLVMValueRef
1304 emit_fetch_64bit(
1305 struct lp_build_tgsi_context * bld_base,
1306 enum tgsi_opcode_type stype,
1307 LLVMValueRef input,
1308 LLVMValueRef input2)
1309 {
1310 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1311 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1312 LLVMBuilderRef builder = gallivm->builder;
1313 LLVMValueRef res;
1314 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1315 int i;
1316 LLVMValueRef shuffles[16];
1317 int len = bld_base->base.type.length * 2;
1318 assert(len <= 16);
1319
1320 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1321 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1322 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1323 }
1324 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1325
1326 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1327 }
1328
1329 static LLVMValueRef
1330 emit_fetch_immediate(
1331 struct lp_build_tgsi_context * bld_base,
1332 const struct tgsi_full_src_register * reg,
1333 enum tgsi_opcode_type stype,
1334 unsigned swizzle)
1335 {
1336 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1337 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1338 LLVMBuilderRef builder = gallivm->builder;
1339 LLVMValueRef res = NULL;
1340
1341 if (bld->use_immediates_array || reg->Register.Indirect) {
1342 LLVMValueRef imms_array;
1343 LLVMTypeRef fptr_type;
1344
1345 /* cast imms_array pointer to float* */
1346 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1347 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1348
1349 if (reg->Register.Indirect) {
1350 LLVMValueRef indirect_index;
1351 LLVMValueRef index_vec; /* index into the immediate register array */
1352 LLVMValueRef index_vec2 = NULL;
1353 indirect_index = get_indirect_index(bld,
1354 reg->Register.File,
1355 reg->Register.Index,
1356 &reg->Indirect);
1357 /*
1358 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1359 * immediates are stored as full vectors (FIXME??? - might be better
1360 * to store them the same as constants) but all elements are the same
1361 * in any case.
1362 */
1363 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1364 indirect_index,
1365 swizzle,
1366 FALSE);
1367 if (tgsi_type_is_64bit(stype))
1368 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1369 indirect_index,
1370 swizzle + 1,
1371 FALSE);
1372 /* Gather values from the immediate register array */
1373 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1374 } else {
1375 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1376 reg->Register.Index * 4 + swizzle);
1377 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1378 bld->imms_array, &lindex, 1, "");
1379 res = LLVMBuildLoad(builder, imms_ptr, "");
1380
1381 if (tgsi_type_is_64bit(stype)) {
1382 LLVMValueRef lindex1;
1383 LLVMValueRef imms_ptr2;
1384 LLVMValueRef res2;
1385
1386 lindex1 = lp_build_const_int32(gallivm,
1387 reg->Register.Index * 4 + swizzle + 1);
1388 imms_ptr2 = LLVMBuildGEP(builder,
1389 bld->imms_array, &lindex1, 1, "");
1390 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1391 res = emit_fetch_64bit(bld_base, stype, res, res2);
1392 }
1393 }
1394 }
1395 else {
1396 res = bld->immediates[reg->Register.Index][swizzle];
1397 if (tgsi_type_is_64bit(stype))
1398 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
1399 }
1400
1401 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1402 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1403 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1404 }
1405 return res;
1406 }
1407
1408 static LLVMValueRef
1409 emit_fetch_input(
1410 struct lp_build_tgsi_context * bld_base,
1411 const struct tgsi_full_src_register * reg,
1412 enum tgsi_opcode_type stype,
1413 unsigned swizzle)
1414 {
1415 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1416 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1417 LLVMBuilderRef builder = gallivm->builder;
1418 LLVMValueRef res;
1419
1420 if (reg->Register.Indirect) {
1421 LLVMValueRef indirect_index;
1422 LLVMValueRef index_vec; /* index into the input reg array */
1423 LLVMValueRef index_vec2 = NULL;
1424 LLVMValueRef inputs_array;
1425 LLVMTypeRef fptr_type;
1426
1427 indirect_index = get_indirect_index(bld,
1428 reg->Register.File,
1429 reg->Register.Index,
1430 &reg->Indirect);
1431
1432 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1433 indirect_index,
1434 swizzle,
1435 TRUE);
1436 if (tgsi_type_is_64bit(stype)) {
1437 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1438 indirect_index,
1439 swizzle + 1,
1440 TRUE);
1441 }
1442 /* cast inputs_array pointer to float* */
1443 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1444 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1445
1446 /* Gather values from the input register array */
1447 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1448 } else {
1449 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1450 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1451 reg->Register.Index * 4 + swizzle);
1452 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1453 bld->inputs_array, &lindex, 1, "");
1454
1455 res = LLVMBuildLoad(builder, input_ptr, "");
1456 if (tgsi_type_is_64bit(stype)) {
1457 LLVMValueRef lindex1;
1458 LLVMValueRef input_ptr2;
1459 LLVMValueRef res2;
1460
1461 lindex1 = lp_build_const_int32(gallivm,
1462 reg->Register.Index * 4 + swizzle + 1);
1463 input_ptr2 = LLVMBuildGEP(builder,
1464 bld->inputs_array, &lindex1, 1, "");
1465 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1466 res = emit_fetch_64bit(bld_base, stype, res, res2);
1467 }
1468 }
1469 else {
1470 res = bld->inputs[reg->Register.Index][swizzle];
1471 if (tgsi_type_is_64bit(stype))
1472 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
1473 }
1474 }
1475
1476 assert(res);
1477
1478 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1479 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1480 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1481 }
1482
1483 return res;
1484 }
1485
1486
1487 static LLVMValueRef
1488 emit_fetch_gs_input(
1489 struct lp_build_tgsi_context * bld_base,
1490 const struct tgsi_full_src_register * reg,
1491 enum tgsi_opcode_type stype,
1492 unsigned swizzle)
1493 {
1494 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1495 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1496 const struct tgsi_shader_info *info = bld->bld_base.info;
1497 LLVMBuilderRef builder = gallivm->builder;
1498 LLVMValueRef attrib_index = NULL;
1499 LLVMValueRef vertex_index = NULL;
1500 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1501 LLVMValueRef res;
1502
1503 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1504 /* This is really a system value not a regular input */
1505 assert(!reg->Register.Indirect);
1506 assert(!reg->Dimension.Indirect);
1507 res = bld->system_values.prim_id;
1508 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1509 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1510 }
1511 return res;
1512 }
1513
1514 if (reg->Register.Indirect) {
1515 attrib_index = get_indirect_index(bld,
1516 reg->Register.File,
1517 reg->Register.Index,
1518 &reg->Indirect);
1519 } else {
1520 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1521 }
1522
1523 if (reg->Dimension.Indirect) {
1524 vertex_index = get_indirect_index(bld,
1525 reg->Register.File,
1526 reg->Dimension.Index,
1527 &reg->DimIndirect);
1528 } else {
1529 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1530 }
1531
1532 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1533 reg->Dimension.Indirect,
1534 vertex_index,
1535 reg->Register.Indirect,
1536 attrib_index,
1537 swizzle_index);
1538
1539 assert(res);
1540 if (tgsi_type_is_64bit(stype)) {
1541 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
1542 LLVMValueRef res2;
1543 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1544 reg->Dimension.Indirect,
1545 vertex_index,
1546 reg->Register.Indirect,
1547 attrib_index,
1548 swizzle_index);
1549 assert(res2);
1550 res = emit_fetch_64bit(bld_base, stype, res, res2);
1551 } else if (stype == TGSI_TYPE_UNSIGNED) {
1552 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1553 } else if (stype == TGSI_TYPE_SIGNED) {
1554 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1555 }
1556
1557 return res;
1558 }
1559
1560 static LLVMValueRef
1561 emit_fetch_temporary(
1562 struct lp_build_tgsi_context * bld_base,
1563 const struct tgsi_full_src_register * reg,
1564 enum tgsi_opcode_type stype,
1565 unsigned swizzle)
1566 {
1567 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1568 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1569 LLVMBuilderRef builder = gallivm->builder;
1570 LLVMValueRef res;
1571
1572 if (reg->Register.Indirect) {
1573 LLVMValueRef indirect_index;
1574 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1575 LLVMValueRef temps_array;
1576 LLVMTypeRef fptr_type;
1577
1578 indirect_index = get_indirect_index(bld,
1579 reg->Register.File,
1580 reg->Register.Index,
1581 &reg->Indirect);
1582
1583 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1584 indirect_index,
1585 swizzle,
1586 TRUE);
1587 if (tgsi_type_is_64bit(stype)) {
1588 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1589 indirect_index,
1590 swizzle + 1,
1591 TRUE);
1592 }
1593
1594 /* cast temps_array pointer to float* */
1595 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1596 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1597
1598 /* Gather values from the temporary register array */
1599 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1600 }
1601 else {
1602 LLVMValueRef temp_ptr;
1603 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1604 res = LLVMBuildLoad(builder, temp_ptr, "");
1605
1606 if (tgsi_type_is_64bit(stype)) {
1607 LLVMValueRef temp_ptr2, res2;
1608
1609 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
1610 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1611 res = emit_fetch_64bit(bld_base, stype, res, res2);
1612 }
1613 }
1614
1615 if (stype == TGSI_TYPE_SIGNED ||
1616 stype == TGSI_TYPE_UNSIGNED ||
1617 stype == TGSI_TYPE_DOUBLE ||
1618 stype == TGSI_TYPE_SIGNED64 ||
1619 stype == TGSI_TYPE_UNSIGNED64) {
1620 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1621 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1622 }
1623
1624 return res;
1625 }
1626
1627 static LLVMValueRef
1628 emit_fetch_system_value(
1629 struct lp_build_tgsi_context * bld_base,
1630 const struct tgsi_full_src_register * reg,
1631 enum tgsi_opcode_type stype,
1632 unsigned swizzle)
1633 {
1634 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1635 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1636 const struct tgsi_shader_info *info = bld->bld_base.info;
1637 LLVMBuilderRef builder = gallivm->builder;
1638 LLVMValueRef res;
1639 enum tgsi_opcode_type atype; // Actual type of the value
1640
1641 assert(!reg->Register.Indirect);
1642
1643 switch (info->system_value_semantic_name[reg->Register.Index]) {
1644 case TGSI_SEMANTIC_INSTANCEID:
1645 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1646 atype = TGSI_TYPE_UNSIGNED;
1647 break;
1648
1649 case TGSI_SEMANTIC_VERTEXID:
1650 res = bld->system_values.vertex_id;
1651 atype = TGSI_TYPE_UNSIGNED;
1652 break;
1653
1654 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1655 res = bld->system_values.vertex_id_nobase;
1656 atype = TGSI_TYPE_UNSIGNED;
1657 break;
1658
1659 case TGSI_SEMANTIC_BASEVERTEX:
1660 res = bld->system_values.basevertex;
1661 atype = TGSI_TYPE_UNSIGNED;
1662 break;
1663
1664 case TGSI_SEMANTIC_PRIMID:
1665 res = bld->system_values.prim_id;
1666 atype = TGSI_TYPE_UNSIGNED;
1667 break;
1668
1669 case TGSI_SEMANTIC_INVOCATIONID:
1670 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1671 atype = TGSI_TYPE_UNSIGNED;
1672 break;
1673
1674 default:
1675 assert(!"unexpected semantic in emit_fetch_system_value");
1676 res = bld_base->base.zero;
1677 atype = TGSI_TYPE_FLOAT;
1678 break;
1679 }
1680
1681 if (atype != stype) {
1682 if (stype == TGSI_TYPE_FLOAT) {
1683 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1684 } else if (stype == TGSI_TYPE_UNSIGNED) {
1685 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1686 } else if (stype == TGSI_TYPE_SIGNED) {
1687 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1688 }
1689 }
1690
1691 return res;
1692 }
1693
1694 /**
1695 * Register fetch with derivatives.
1696 */
1697 static void
1698 emit_fetch_deriv(
1699 struct lp_build_tgsi_soa_context *bld,
1700 LLVMValueRef src,
1701 LLVMValueRef *res,
1702 LLVMValueRef *ddx,
1703 LLVMValueRef *ddy)
1704 {
1705 if (res)
1706 *res = src;
1707
1708 /* TODO: use interpolation coeffs for inputs */
1709
1710 if (ddx)
1711 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1712
1713 if (ddy)
1714 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1715 }
1716
1717 /**
1718 * store an array of 8 64-bit into two arrays of 8 floats
1719 * i.e.
1720 * value is d0, d1, d2, d3 etc.
1721 * each 64-bit has high and low pieces x, y
1722 * so gets stored into the separate channels as:
1723 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1724 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1725 */
1726 static void
1727 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1728 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1729 LLVMValueRef value)
1730 {
1731 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1732 struct gallivm_state *gallivm = bld_base->base.gallivm;
1733 LLVMBuilderRef builder = gallivm->builder;
1734 struct lp_build_context *float_bld = &bld_base->base;
1735 unsigned i;
1736 LLVMValueRef temp, temp2;
1737 LLVMValueRef shuffles[8];
1738 LLVMValueRef shuffles2[8];
1739
1740 for (i = 0; i < bld_base->base.type.length; i++) {
1741 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1742 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1743 }
1744
1745 temp = LLVMBuildShuffleVector(builder, value,
1746 LLVMGetUndef(LLVMTypeOf(value)),
1747 LLVMConstVector(shuffles,
1748 bld_base->base.type.length),
1749 "");
1750 temp2 = LLVMBuildShuffleVector(builder, value,
1751 LLVMGetUndef(LLVMTypeOf(value)),
1752 LLVMConstVector(shuffles2,
1753 bld_base->base.type.length),
1754 "");
1755
1756 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1757 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1758 }
1759
1760 /**
1761 * Register store.
1762 */
1763 static void
1764 emit_store_chan(
1765 struct lp_build_tgsi_context *bld_base,
1766 const struct tgsi_full_instruction *inst,
1767 unsigned index,
1768 unsigned chan_index,
1769 LLVMValueRef value)
1770 {
1771 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1772 struct gallivm_state *gallivm = bld_base->base.gallivm;
1773 LLVMBuilderRef builder = gallivm->builder;
1774 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1775 struct lp_build_context *float_bld = &bld_base->base;
1776 struct lp_build_context *int_bld = &bld_base->int_bld;
1777 LLVMValueRef indirect_index = NULL;
1778 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1779
1780 /*
1781 * Apply saturation.
1782 *
1783 * It is always assumed to be float.
1784 */
1785 if (inst->Instruction.Saturate) {
1786 assert(dtype == TGSI_TYPE_FLOAT ||
1787 dtype == TGSI_TYPE_UNTYPED);
1788 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1789 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1790 }
1791
1792 if (reg->Register.Indirect) {
1793 /*
1794 * Currently the mesa/st doesn't generate indirect stores
1795 * to 64-bit values, it normally uses MOV to do indirect stores.
1796 */
1797 assert(!tgsi_type_is_64bit(dtype));
1798 indirect_index = get_indirect_index(bld,
1799 reg->Register.File,
1800 reg->Register.Index,
1801 &reg->Indirect);
1802 } else {
1803 assert(reg->Register.Index <=
1804 bld_base->info->file_max[reg->Register.File]);
1805 }
1806
1807 if (DEBUG_EXECUTION) {
1808 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1809 }
1810
1811 switch( reg->Register.File ) {
1812 case TGSI_FILE_OUTPUT:
1813 /* Outputs are always stored as floats */
1814 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1815
1816 if (reg->Register.Indirect) {
1817 LLVMValueRef index_vec; /* indexes into the output registers */
1818 LLVMValueRef outputs_array;
1819 LLVMTypeRef fptr_type;
1820
1821 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1822 indirect_index,
1823 chan_index,
1824 TRUE);
1825
1826 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1827 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1828
1829 /* Scatter store values into output registers */
1830 emit_mask_scatter(bld, outputs_array, index_vec, value,
1831 &bld->exec_mask);
1832 }
1833 else {
1834 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1835 chan_index);
1836
1837 if (tgsi_type_is_64bit(dtype)) {
1838 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1839 chan_index + 1);
1840 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1841 value);
1842 } else
1843 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1844 }
1845 break;
1846
1847 case TGSI_FILE_TEMPORARY:
1848 /* Temporaries are always stored as floats */
1849 if (!tgsi_type_is_64bit(dtype))
1850 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1851 else
1852 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1853
1854 if (reg->Register.Indirect) {
1855 LLVMValueRef index_vec; /* indexes into the temp registers */
1856 LLVMValueRef temps_array;
1857 LLVMTypeRef fptr_type;
1858
1859 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1860 indirect_index,
1861 chan_index,
1862 TRUE);
1863
1864 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1865 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1866
1867 /* Scatter store values into temp registers */
1868 emit_mask_scatter(bld, temps_array, index_vec, value,
1869 &bld->exec_mask);
1870 }
1871 else {
1872 LLVMValueRef temp_ptr;
1873 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1874
1875 if (tgsi_type_is_64bit(dtype)) {
1876 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1877 reg->Register.Index,
1878 chan_index + 1);
1879 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1880 value);
1881 }
1882 else
1883 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1884 }
1885 break;
1886
1887 case TGSI_FILE_ADDRESS:
1888 assert(dtype == TGSI_TYPE_SIGNED);
1889 assert(LLVMTypeOf(value) == int_bld->vec_type);
1890 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1891 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1892 bld->addr[reg->Register.Index][chan_index]);
1893 break;
1894
1895 default:
1896 assert( 0 );
1897 }
1898
1899 (void)dtype;
1900 }
1901
1902 /*
1903 * Called at the beginning of the translation of each TGSI instruction, to
1904 * emit some debug code.
1905 */
1906 static void
1907 emit_debug(
1908 struct lp_build_tgsi_context * bld_base,
1909 const struct tgsi_full_instruction * inst,
1910 const struct tgsi_opcode_info * info)
1911
1912 {
1913 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1914
1915 if (DEBUG_EXECUTION) {
1916 /*
1917 * Dump the TGSI instruction.
1918 */
1919
1920 struct gallivm_state *gallivm = bld_base->base.gallivm;
1921 char buf[512];
1922 buf[0] = '$';
1923 buf[1] = ' ';
1924 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1925 lp_build_printf(gallivm, buf);
1926
1927 /* Dump the execution mask.
1928 */
1929 if (bld->exec_mask.has_mask) {
1930 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1931 }
1932 }
1933 }
1934
1935 static void
1936 emit_store(
1937 struct lp_build_tgsi_context * bld_base,
1938 const struct tgsi_full_instruction * inst,
1939 const struct tgsi_opcode_info * info,
1940 LLVMValueRef dst[4])
1941
1942 {
1943 unsigned chan_index;
1944 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1945
1946 if(info->num_dst) {
1947 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1948
1949 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1950 continue;
1951 emit_store_chan(bld_base, inst, 0, chan_index, dst[chan_index]);
1952 }
1953 }
1954 }
1955
1956 static unsigned
1957 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1958 {
1959 switch (tgsi_target) {
1960 case TGSI_TEXTURE_BUFFER:
1961 return PIPE_BUFFER;
1962 case TGSI_TEXTURE_1D:
1963 case TGSI_TEXTURE_SHADOW1D:
1964 return PIPE_TEXTURE_1D;
1965 case TGSI_TEXTURE_2D:
1966 case TGSI_TEXTURE_SHADOW2D:
1967 case TGSI_TEXTURE_2D_MSAA:
1968 return PIPE_TEXTURE_2D;
1969 case TGSI_TEXTURE_3D:
1970 return PIPE_TEXTURE_3D;
1971 case TGSI_TEXTURE_CUBE:
1972 case TGSI_TEXTURE_SHADOWCUBE:
1973 return PIPE_TEXTURE_CUBE;
1974 case TGSI_TEXTURE_RECT:
1975 case TGSI_TEXTURE_SHADOWRECT:
1976 return PIPE_TEXTURE_RECT;
1977 case TGSI_TEXTURE_1D_ARRAY:
1978 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1979 return PIPE_TEXTURE_1D_ARRAY;
1980 case TGSI_TEXTURE_2D_ARRAY:
1981 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1982 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1983 return PIPE_TEXTURE_2D_ARRAY;
1984 case TGSI_TEXTURE_CUBE_ARRAY:
1985 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1986 return PIPE_TEXTURE_CUBE_ARRAY;
1987 default:
1988 assert(0);
1989 return PIPE_BUFFER;
1990 }
1991 }
1992
1993
1994 static enum lp_sampler_lod_property
1995 lp_build_lod_property(
1996 struct lp_build_tgsi_context *bld_base,
1997 const struct tgsi_full_instruction *inst,
1998 unsigned src_op)
1999 {
2000 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2001 enum lp_sampler_lod_property lod_property;
2002
2003 /*
2004 * Not much we can do here. We could try catching inputs declared
2005 * with constant interpolation but not sure it's worth it - since for
2006 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2007 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2008 * like the constant/immediate recognition below.
2009 * What seems to be of more value would be to recognize temps holding
2010 * broadcasted scalars but no way we can do it.
2011 * Tried asking llvm but without any success (using LLVMIsConstant
2012 * even though this isn't exactly what we'd need), even as simple as
2013 * IMM[0] UINT32 (0,-1,0,0)
2014 * MOV TEMP[0] IMM[0].yyyy
2015 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2016 * doesn't work.
2017 * This means there's ZERO chance this will ever catch a scalar lod
2018 * with traditional tex opcodes as well as texel fetches, since the lod
2019 * comes from the same reg as coords (except some test shaders using
2020 * constant coords maybe).
2021 * There's at least hope for sample opcodes as well as size queries.
2022 */
2023 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2024 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2025 lod_property = LP_SAMPLER_LOD_SCALAR;
2026 }
2027 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2028 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2029 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2030 }
2031 else {
2032 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2033 }
2034 }
2035 else {
2036 /* never use scalar (per-quad) lod the results are just too wrong. */
2037 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2038 }
2039 return lod_property;
2040 }
2041
2042
2043 /**
2044 * High-level instruction translators.
2045 */
2046
2047 static void
2048 emit_tex( struct lp_build_tgsi_soa_context *bld,
2049 const struct tgsi_full_instruction *inst,
2050 enum lp_build_tex_modifier modifier,
2051 LLVMValueRef *texel,
2052 unsigned sampler_reg,
2053 enum lp_sampler_op_type sampler_op)
2054 {
2055 unsigned unit = inst->Src[sampler_reg].Register.Index;
2056 LLVMValueRef oow = NULL;
2057 LLVMValueRef lod = NULL;
2058 LLVMValueRef coords[5];
2059 LLVMValueRef offsets[3] = { NULL };
2060 struct lp_derivatives derivs;
2061 struct lp_sampler_params params;
2062 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2063 unsigned num_derivs, num_offsets, i;
2064 unsigned shadow_coord = 0;
2065 unsigned layer_coord = 0;
2066 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2067
2068 memset(&params, 0, sizeof(params));
2069
2070 if (!bld->sampler) {
2071 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2072 for (i = 0; i < 4; i++) {
2073 texel[i] = bld->bld_base.base.undef;
2074 }
2075 return;
2076 }
2077
2078 switch (inst->Texture.Texture) {
2079 case TGSI_TEXTURE_1D_ARRAY:
2080 layer_coord = 1;
2081 /* fallthrough */
2082 case TGSI_TEXTURE_1D:
2083 num_offsets = 1;
2084 num_derivs = 1;
2085 break;
2086 case TGSI_TEXTURE_2D_ARRAY:
2087 layer_coord = 2;
2088 /* fallthrough */
2089 case TGSI_TEXTURE_2D:
2090 case TGSI_TEXTURE_RECT:
2091 num_offsets = 2;
2092 num_derivs = 2;
2093 break;
2094 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2095 layer_coord = 1;
2096 /* fallthrough */
2097 case TGSI_TEXTURE_SHADOW1D:
2098 shadow_coord = 2;
2099 num_offsets = 1;
2100 num_derivs = 1;
2101 break;
2102 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2103 layer_coord = 2;
2104 shadow_coord = 3;
2105 num_offsets = 2;
2106 num_derivs = 2;
2107 break;
2108 case TGSI_TEXTURE_SHADOW2D:
2109 case TGSI_TEXTURE_SHADOWRECT:
2110 shadow_coord = 2;
2111 num_offsets = 2;
2112 num_derivs = 2;
2113 break;
2114 case TGSI_TEXTURE_CUBE:
2115 num_offsets = 2;
2116 num_derivs = 3;
2117 break;
2118 case TGSI_TEXTURE_3D:
2119 num_offsets = 3;
2120 num_derivs = 3;
2121 break;
2122 case TGSI_TEXTURE_SHADOWCUBE:
2123 shadow_coord = 3;
2124 num_offsets = 2;
2125 num_derivs = 3;
2126 break;
2127 case TGSI_TEXTURE_CUBE_ARRAY:
2128 num_offsets = 2;
2129 num_derivs = 3;
2130 layer_coord = 3;
2131 break;
2132 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2133 num_offsets = 2;
2134 num_derivs = 3;
2135 layer_coord = 3;
2136 shadow_coord = 4; /* shadow coord special different reg */
2137 break;
2138 case TGSI_TEXTURE_2D_MSAA:
2139 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2140 default:
2141 assert(0);
2142 return;
2143 }
2144
2145 /* Note lod and especially projected are illegal in a LOT of cases */
2146 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2147 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2148 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2149 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2150 /* note that shadow cube array with bias/explicit lod does not exist */
2151 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2152 }
2153 else {
2154 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2155 }
2156 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2157 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2158 }
2159 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2160 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2161 }
2162 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2163 }
2164
2165 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2166 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2167 oow = lp_build_rcp(&bld->bld_base.base, oow);
2168 }
2169
2170 for (i = 0; i < num_derivs; i++) {
2171 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2172 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2173 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2174 }
2175 for (i = num_derivs; i < 5; i++) {
2176 coords[i] = bld->bld_base.base.undef;
2177 }
2178
2179 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2180 if (layer_coord) {
2181 if (layer_coord == 3) {
2182 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2183 }
2184 else {
2185 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2186 }
2187 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2188 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2189 }
2190 /* Shadow coord occupies always 5th slot. */
2191 if (shadow_coord) {
2192 sample_key |= LP_SAMPLER_SHADOW;
2193 if (shadow_coord == 4) {
2194 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2195 }
2196 else {
2197 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2198 }
2199 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2200 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2201 }
2202
2203 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2204 unsigned dim;
2205 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2206 for (dim = 0; dim < num_derivs; ++dim) {
2207 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2208 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2209 }
2210 params.derivs = &derivs;
2211 /*
2212 * could also check all src regs if constant but I doubt such
2213 * cases exist in practice.
2214 */
2215 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2216 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2217 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2218 }
2219 else {
2220 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2221 }
2222 }
2223 else {
2224 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2225 }
2226 }
2227 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2228
2229 /* we don't handle the 4 offset version of tg4 */
2230 if (inst->Texture.NumOffsets == 1) {
2231 unsigned dim;
2232 sample_key |= LP_SAMPLER_OFFSETS;
2233 for (dim = 0; dim < num_offsets; dim++) {
2234 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2235 }
2236 }
2237
2238 params.type = bld->bld_base.base.type;
2239 params.sample_key = sample_key;
2240 params.texture_index = unit;
2241 params.sampler_index = unit;
2242 params.context_ptr = bld->context_ptr;
2243 params.thread_data_ptr = bld->thread_data_ptr;
2244 params.coords = coords;
2245 params.offsets = offsets;
2246 params.lod = lod;
2247 params.texel = texel;
2248
2249 bld->sampler->emit_tex_sample(bld->sampler,
2250 bld->bld_base.base.gallivm,
2251 &params);
2252 }
2253
2254 static void
2255 emit_sample(struct lp_build_tgsi_soa_context *bld,
2256 const struct tgsi_full_instruction *inst,
2257 enum lp_build_tex_modifier modifier,
2258 boolean compare,
2259 LLVMValueRef *texel)
2260 {
2261 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2262 unsigned texture_unit, sampler_unit;
2263 LLVMValueRef lod = NULL;
2264 LLVMValueRef coords[5];
2265 LLVMValueRef offsets[3] = { NULL };
2266 struct lp_derivatives derivs;
2267 struct lp_sampler_params params;
2268 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2269
2270 unsigned num_offsets, num_derivs, i;
2271 unsigned layer_coord = 0;
2272 unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
2273
2274 memset(&params, 0, sizeof(params));
2275
2276 if (!bld->sampler) {
2277 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2278 for (i = 0; i < 4; i++) {
2279 texel[i] = bld->bld_base.base.undef;
2280 }
2281 return;
2282 }
2283
2284 /*
2285 * unlike old-style tex opcodes the texture/sampler indices
2286 * always come from src1 and src2 respectively.
2287 */
2288 texture_unit = inst->Src[1].Register.Index;
2289 sampler_unit = inst->Src[2].Register.Index;
2290
2291 /*
2292 * Note inst->Texture.Texture will contain the number of offsets,
2293 * however the target information is NOT there and comes from the
2294 * declared sampler views instead.
2295 */
2296 switch (bld->sv[texture_unit].Resource) {
2297 case TGSI_TEXTURE_1D:
2298 num_offsets = 1;
2299 num_derivs = 1;
2300 break;
2301 case TGSI_TEXTURE_1D_ARRAY:
2302 layer_coord = 1;
2303 num_offsets = 1;
2304 num_derivs = 1;
2305 break;
2306 case TGSI_TEXTURE_2D:
2307 case TGSI_TEXTURE_RECT:
2308 num_offsets = 2;
2309 num_derivs = 2;
2310 break;
2311 case TGSI_TEXTURE_2D_ARRAY:
2312 layer_coord = 2;
2313 num_offsets = 2;
2314 num_derivs = 2;
2315 break;
2316 case TGSI_TEXTURE_CUBE:
2317 num_offsets = 2;
2318 num_derivs = 3;
2319 break;
2320 case TGSI_TEXTURE_3D:
2321 num_offsets = 3;
2322 num_derivs = 3;
2323 break;
2324 case TGSI_TEXTURE_CUBE_ARRAY:
2325 layer_coord = 3;
2326 num_offsets = 2;
2327 num_derivs = 3;
2328 break;
2329 default:
2330 assert(0);
2331 return;
2332 }
2333
2334 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2335 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2336 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2337 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2338 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2339 }
2340 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2341 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2342 }
2343 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2344 }
2345 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2346 /* XXX might be better to explicitly pass the level zero information */
2347 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2348 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2349 }
2350
2351 for (i = 0; i < num_derivs; i++) {
2352 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2353 }
2354 for (i = num_derivs; i < 5; i++) {
2355 coords[i] = bld->bld_base.base.undef;
2356 }
2357
2358 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2359 if (layer_coord) {
2360 if (layer_coord == 3)
2361 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2362 else
2363 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2364 }
2365 /* Shadow coord occupies always 5th slot. */
2366 if (compare) {
2367 sample_key |= LP_SAMPLER_SHADOW;
2368 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2369 }
2370
2371 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2372 unsigned dim;
2373 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2374 for (dim = 0; dim < num_derivs; ++dim) {
2375 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2376 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2377 }
2378 params.derivs = &derivs;
2379 /*
2380 * could also check all src regs if constant but I doubt such
2381 * cases exist in practice.
2382 */
2383 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2384 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2385 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2386 }
2387 else {
2388 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2389 }
2390 }
2391 else {
2392 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2393 }
2394 }
2395
2396 /* some advanced gather instructions (txgo) would require 4 offsets */
2397 if (inst->Texture.NumOffsets == 1) {
2398 unsigned dim;
2399 sample_key |= LP_SAMPLER_OFFSETS;
2400 for (dim = 0; dim < num_offsets; dim++) {
2401 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2402 }
2403 }
2404 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2405
2406 params.type = bld->bld_base.base.type;
2407 params.sample_key = sample_key;
2408 params.texture_index = texture_unit;
2409 params.sampler_index = sampler_unit;
2410 params.context_ptr = bld->context_ptr;
2411 params.thread_data_ptr = bld->thread_data_ptr;
2412 params.coords = coords;
2413 params.offsets = offsets;
2414 params.lod = lod;
2415 params.texel = texel;
2416
2417 bld->sampler->emit_tex_sample(bld->sampler,
2418 bld->bld_base.base.gallivm,
2419 &params);
2420
2421 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2422 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2423 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2424 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2425 unsigned char swizzles[4];
2426 swizzles[0] = inst->Src[1].Register.SwizzleX;
2427 swizzles[1] = inst->Src[1].Register.SwizzleY;
2428 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2429 swizzles[3] = inst->Src[1].Register.SwizzleW;
2430
2431 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2432 }
2433 }
2434
2435 static void
2436 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2437 const struct tgsi_full_instruction *inst,
2438 LLVMValueRef *texel,
2439 boolean is_samplei)
2440 {
2441 unsigned unit, target;
2442 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2443 LLVMValueRef explicit_lod = NULL;
2444 LLVMValueRef coords[5];
2445 LLVMValueRef offsets[3] = { NULL };
2446 struct lp_sampler_params params;
2447 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2448 unsigned dims, i;
2449 unsigned layer_coord = 0;
2450 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2451
2452 memset(&params, 0, sizeof(params));
2453
2454 if (!bld->sampler) {
2455 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2456 for (i = 0; i < 4; i++) {
2457 texel[i] = coord_undef;
2458 }
2459 return;
2460 }
2461
2462 unit = inst->Src[1].Register.Index;
2463
2464 if (is_samplei) {
2465 target = bld->sv[unit].Resource;
2466 }
2467 else {
2468 target = inst->Texture.Texture;
2469 }
2470
2471 switch (target) {
2472 case TGSI_TEXTURE_1D:
2473 case TGSI_TEXTURE_BUFFER:
2474 dims = 1;
2475 break;
2476 case TGSI_TEXTURE_1D_ARRAY:
2477 layer_coord = 1;
2478 dims = 1;
2479 break;
2480 case TGSI_TEXTURE_2D:
2481 case TGSI_TEXTURE_RECT:
2482 case TGSI_TEXTURE_2D_MSAA:
2483 dims = 2;
2484 break;
2485 case TGSI_TEXTURE_2D_ARRAY:
2486 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2487 layer_coord = 2;
2488 dims = 2;
2489 break;
2490 case TGSI_TEXTURE_3D:
2491 dims = 3;
2492 break;
2493 default:
2494 assert(0);
2495 return;
2496 }
2497
2498 /* always have lod except for buffers and msaa targets ? */
2499 if (target != TGSI_TEXTURE_BUFFER &&
2500 target != TGSI_TEXTURE_2D_MSAA &&
2501 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2502 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2503 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2504 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2505 }
2506 /*
2507 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2508 * would be the sample index.
2509 */
2510
2511 for (i = 0; i < dims; i++) {
2512 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2513 }
2514 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2515 for (i = dims; i < 5; i++) {
2516 coords[i] = coord_undef;
2517 }
2518 if (layer_coord)
2519 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2520
2521 if (inst->Texture.NumOffsets == 1) {
2522 unsigned dim;
2523 sample_key |= LP_SAMPLER_OFFSETS;
2524 for (dim = 0; dim < dims; dim++) {
2525 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2526 }
2527 }
2528 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2529
2530 params.type = bld->bld_base.base.type;
2531 params.sample_key = sample_key;
2532 params.texture_index = unit;
2533 /*
2534 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2535 * and trigger some assertions with d3d10 where the sampler view number
2536 * can exceed this.
2537 */
2538 params.sampler_index = 0;
2539 params.context_ptr = bld->context_ptr;
2540 params.thread_data_ptr = bld->thread_data_ptr;
2541 params.coords = coords;
2542 params.offsets = offsets;
2543 params.derivs = NULL;
2544 params.lod = explicit_lod;
2545 params.texel = texel;
2546
2547 bld->sampler->emit_tex_sample(bld->sampler,
2548 bld->bld_base.base.gallivm,
2549 &params);
2550
2551 if (is_samplei &&
2552 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2553 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2554 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2555 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2556 unsigned char swizzles[4];
2557 swizzles[0] = inst->Src[1].Register.SwizzleX;
2558 swizzles[1] = inst->Src[1].Register.SwizzleY;
2559 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2560 swizzles[3] = inst->Src[1].Register.SwizzleW;
2561
2562 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2563 }
2564 }
2565
2566 static void
2567 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2568 const struct tgsi_full_instruction *inst,
2569 LLVMValueRef *sizes_out,
2570 boolean is_sviewinfo)
2571 {
2572 LLVMValueRef explicit_lod;
2573 enum lp_sampler_lod_property lod_property;
2574 unsigned has_lod;
2575 unsigned i;
2576 unsigned unit = inst->Src[1].Register.Index;
2577 unsigned target, pipe_target;
2578 struct lp_sampler_size_query_params params;
2579
2580 if (is_sviewinfo) {
2581 target = bld->sv[unit].Resource;
2582 }
2583 else {
2584 target = inst->Texture.Texture;
2585 }
2586 switch (target) {
2587 case TGSI_TEXTURE_BUFFER:
2588 case TGSI_TEXTURE_RECT:
2589 case TGSI_TEXTURE_SHADOWRECT:
2590 has_lod = 0;
2591 break;
2592 default:
2593 has_lod = 1;
2594 break;
2595 }
2596
2597 if (!bld->sampler) {
2598 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2599 for (i = 0; i < 4; i++)
2600 sizes_out[i] = bld->bld_base.int_bld.undef;
2601 return;
2602 }
2603
2604 if (has_lod) {
2605 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2606 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2607 }
2608 else {
2609 explicit_lod = NULL;
2610 lod_property = LP_SAMPLER_LOD_SCALAR;
2611 }
2612
2613
2614 pipe_target = tgsi_to_pipe_tex_target(target);
2615
2616 params.int_type = bld->bld_base.int_bld.type;
2617 params.texture_unit = unit;
2618 params.target = pipe_target;
2619 params.context_ptr = bld->context_ptr;
2620 params.is_sviewinfo = TRUE;
2621 params.lod_property = lod_property;
2622 params.explicit_lod = explicit_lod;
2623 params.sizes_out = sizes_out;
2624
2625 bld->sampler->emit_size_query(bld->sampler,
2626 bld->bld_base.base.gallivm,
2627 &params);
2628 }
2629
2630 static boolean
2631 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2632 int pc)
2633 {
2634 unsigned i;
2635
2636 for (i = 0; i < 5; i++) {
2637 unsigned opcode;
2638
2639 if (pc + i >= bld->bld_base.info->num_instructions)
2640 return TRUE;
2641
2642 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2643
2644 if (opcode == TGSI_OPCODE_END)
2645 return TRUE;
2646
2647 if (opcode == TGSI_OPCODE_TEX ||
2648 opcode == TGSI_OPCODE_TXP ||
2649 opcode == TGSI_OPCODE_TXD ||
2650 opcode == TGSI_OPCODE_TXB ||
2651 opcode == TGSI_OPCODE_TXL ||
2652 opcode == TGSI_OPCODE_TXF ||
2653 opcode == TGSI_OPCODE_TXQ ||
2654 opcode == TGSI_OPCODE_TEX2 ||
2655 opcode == TGSI_OPCODE_TXB2 ||
2656 opcode == TGSI_OPCODE_TXL2 ||
2657 opcode == TGSI_OPCODE_SAMPLE ||
2658 opcode == TGSI_OPCODE_SAMPLE_B ||
2659 opcode == TGSI_OPCODE_SAMPLE_C ||
2660 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2661 opcode == TGSI_OPCODE_SAMPLE_D ||
2662 opcode == TGSI_OPCODE_SAMPLE_I ||
2663 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2664 opcode == TGSI_OPCODE_SAMPLE_L ||
2665 opcode == TGSI_OPCODE_SVIEWINFO ||
2666 opcode == TGSI_OPCODE_CAL ||
2667 opcode == TGSI_OPCODE_CALLNZ ||
2668 opcode == TGSI_OPCODE_IF ||
2669 opcode == TGSI_OPCODE_UIF ||
2670 opcode == TGSI_OPCODE_BGNLOOP ||
2671 opcode == TGSI_OPCODE_SWITCH)
2672 return FALSE;
2673 }
2674
2675 return TRUE;
2676 }
2677
2678
2679
2680 /**
2681 * Kill fragment if any of the src register values are negative.
2682 */
2683 static void
2684 emit_kill_if(
2685 struct lp_build_tgsi_soa_context *bld,
2686 const struct tgsi_full_instruction *inst,
2687 int pc)
2688 {
2689 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2690 const struct tgsi_full_src_register *reg = &inst->Src[0];
2691 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2692 LLVMValueRef mask;
2693 unsigned chan_index;
2694
2695 memset(&terms, 0, sizeof terms);
2696
2697 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2698 unsigned swizzle;
2699
2700 /* Unswizzle channel */
2701 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2702
2703 /* Check if the component has not been already tested. */
2704 assert(swizzle < TGSI_NUM_CHANNELS);
2705 if( !terms[swizzle] )
2706 /* TODO: change the comparison operator instead of setting the sign */
2707 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2708 }
2709
2710 mask = NULL;
2711 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2712 if(terms[chan_index]) {
2713 LLVMValueRef chan_mask;
2714
2715 /*
2716 * If term < 0 then mask = 0 else mask = ~0.
2717 */
2718 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2719
2720 if(mask)
2721 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2722 else
2723 mask = chan_mask;
2724 }
2725 }
2726
2727 if (bld->exec_mask.has_mask) {
2728 LLVMValueRef invmask;
2729 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2730 mask = LLVMBuildOr(builder, mask, invmask, "");
2731 }
2732
2733 lp_build_mask_update(bld->mask, mask);
2734 if (!near_end_of_shader(bld, pc))
2735 lp_build_mask_check(bld->mask);
2736 }
2737
2738
2739 /**
2740 * Unconditional fragment kill.
2741 * The only predication is the execution mask which will apply if
2742 * we're inside a loop or conditional.
2743 */
2744 static void
2745 emit_kill(struct lp_build_tgsi_soa_context *bld,
2746 int pc)
2747 {
2748 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2749 LLVMValueRef mask;
2750
2751 /* For those channels which are "alive", disable fragment shader
2752 * execution.
2753 */
2754 if (bld->exec_mask.has_mask) {
2755 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2756 }
2757 else {
2758 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2759 mask = zero;
2760 }
2761
2762 lp_build_mask_update(bld->mask, mask);
2763
2764 if (!near_end_of_shader(bld, pc))
2765 lp_build_mask_check(bld->mask);
2766 }
2767
2768
2769 /**
2770 * Emit code which will dump the value of all the temporary registers
2771 * to stdout.
2772 */
2773 static void
2774 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2775 unsigned file)
2776 {
2777 const struct tgsi_shader_info *info = bld->bld_base.info;
2778 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2779 LLVMBuilderRef builder = gallivm->builder;
2780 LLVMValueRef reg_ptr;
2781 int index;
2782 int max_index = info->file_max[file];
2783
2784 /*
2785 * Some register files, particularly constants, can be very large,
2786 * and dumping everything could make this unusably slow.
2787 */
2788 max_index = MIN2(max_index, 32);
2789
2790 for (index = 0; index <= max_index; index++) {
2791 LLVMValueRef res;
2792 unsigned mask;
2793 int chan;
2794
2795 if (index < 8 * sizeof(unsigned) &&
2796 (info->file_mask[file] & (1u << index)) == 0) {
2797 /* This was not declared.*/
2798 continue;
2799 }
2800
2801 if (file == TGSI_FILE_INPUT) {
2802 mask = info->input_usage_mask[index];
2803 } else {
2804 mask = TGSI_WRITEMASK_XYZW;
2805 }
2806
2807 for (chan = 0; chan < 4; chan++) {
2808 if ((mask & (1 << chan)) == 0) {
2809 /* This channel is not used.*/
2810 continue;
2811 }
2812
2813 if (file == TGSI_FILE_CONSTANT) {
2814 struct tgsi_full_src_register reg;
2815 memset(&reg, 0, sizeof reg);
2816 reg.Register.File = file;
2817 reg.Register.Index = index;
2818 reg.Register.SwizzleX = 0;
2819 reg.Register.SwizzleY = 1;
2820 reg.Register.SwizzleZ = 2;
2821 reg.Register.SwizzleW = 3;
2822
2823 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2824 if (!res) {
2825 continue;
2826 }
2827 } else if (file == TGSI_FILE_INPUT) {
2828 res = bld->inputs[index][chan];
2829 if (!res) {
2830 continue;
2831 }
2832 } else if (file == TGSI_FILE_TEMPORARY) {
2833 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2834 assert(reg_ptr);
2835 res = LLVMBuildLoad(builder, reg_ptr, "");
2836 } else if (file == TGSI_FILE_OUTPUT) {
2837 reg_ptr = lp_get_output_ptr(bld, index, chan);
2838 assert(reg_ptr);
2839 res = LLVMBuildLoad(builder, reg_ptr, "");
2840 } else {
2841 assert(0);
2842 continue;
2843 }
2844
2845 emit_dump_reg(gallivm, file, index, chan, res);
2846 }
2847 }
2848 }
2849
2850
2851
2852 void
2853 lp_emit_declaration_soa(
2854 struct lp_build_tgsi_context *bld_base,
2855 const struct tgsi_full_declaration *decl)
2856 {
2857 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2858 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2859 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2860 const unsigned first = decl->Range.First;
2861 const unsigned last = decl->Range.Last;
2862 unsigned idx, i;
2863
2864 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2865
2866 switch (decl->Declaration.File) {
2867 case TGSI_FILE_TEMPORARY:
2868 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2869 assert(last < LP_MAX_INLINED_TEMPS);
2870 for (idx = first; idx <= last; ++idx) {
2871 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2872 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2873 }
2874 }
2875 break;
2876
2877 case TGSI_FILE_OUTPUT:
2878 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2879 for (idx = first; idx <= last; ++idx) {
2880 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2881 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2882 vec_type, "output");
2883 }
2884 }
2885 break;
2886
2887 case TGSI_FILE_ADDRESS:
2888 /* ADDR registers are only allocated with an integer LLVM IR type,
2889 * as they are guaranteed to always have integers.
2890 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2891 * an ADDR register for that matter).
2892 */
2893 assert(last < LP_MAX_TGSI_ADDRS);
2894 for (idx = first; idx <= last; ++idx) {
2895 assert(idx < LP_MAX_TGSI_ADDRS);
2896 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2897 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2898 }
2899 break;
2900
2901 case TGSI_FILE_SAMPLER_VIEW:
2902 /*
2903 * The target stored here MUST match whatever there actually
2904 * is in the set sampler views (what about return type?).
2905 */
2906 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2907 for (idx = first; idx <= last; ++idx) {
2908 bld->sv[idx] = decl->SamplerView;
2909 }
2910 break;
2911
2912 case TGSI_FILE_CONSTANT:
2913 {
2914 /*
2915 * We could trivially fetch the per-buffer pointer when fetching the
2916 * constant, relying on llvm to figure out it's always the same pointer
2917 * anyway. However, doing so results in a huge (more than factor of 10)
2918 * slowdown in llvm compilation times for some (but not all) shaders
2919 * (more specifically, the IR optimization spends way more time in
2920 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2921 */
2922 unsigned idx2D = decl->Dim.Index2D;
2923 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2924 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2925 bld->consts[idx2D] =
2926 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2927 bld->consts_sizes[idx2D] =
2928 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2929 }
2930 break;
2931
2932 default:
2933 /* don't need to declare other vars */
2934 break;
2935 }
2936 }
2937
2938
2939 void lp_emit_immediate_soa(
2940 struct lp_build_tgsi_context *bld_base,
2941 const struct tgsi_full_immediate *imm)
2942 {
2943 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2944 struct gallivm_state * gallivm = bld_base->base.gallivm;
2945 LLVMValueRef imms[4];
2946 unsigned i;
2947 const uint size = imm->Immediate.NrTokens - 1;
2948 assert(size <= 4);
2949 switch (imm->Immediate.DataType) {
2950 case TGSI_IMM_FLOAT32:
2951 for( i = 0; i < size; ++i )
2952 imms[i] =
2953 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2954
2955 break;
2956 case TGSI_IMM_FLOAT64:
2957 case TGSI_IMM_UINT64:
2958 case TGSI_IMM_INT64:
2959 case TGSI_IMM_UINT32:
2960 for( i = 0; i < size; ++i ) {
2961 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2962 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2963 }
2964
2965 break;
2966 case TGSI_IMM_INT32:
2967 for( i = 0; i < size; ++i ) {
2968 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2969 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2970 }
2971
2972 break;
2973 }
2974 for( i = size; i < 4; ++i )
2975 imms[i] = bld_base->base.undef;
2976
2977 if (bld->use_immediates_array) {
2978 unsigned index = bld->num_immediates;
2979 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2980 LLVMBuilderRef builder = gallivm->builder;
2981
2982 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2983 for (i = 0; i < 4; ++i ) {
2984 LLVMValueRef lindex = lp_build_const_int32(
2985 bld->bld_base.base.gallivm, index * 4 + i);
2986 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2987 bld->imms_array, &lindex, 1, "");
2988 LLVMBuildStore(builder, imms[i], imm_ptr);
2989 }
2990 } else {
2991 /* simply copy the immediate values into the next immediates[] slot */
2992 unsigned i;
2993 assert(imm->Immediate.NrTokens - 1 <= 4);
2994 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2995
2996 for(i = 0; i < 4; ++i )
2997 bld->immediates[bld->num_immediates][i] = imms[i];
2998
2999 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3000 unsigned index = bld->num_immediates;
3001 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3002 LLVMBuilderRef builder = gallivm->builder;
3003 for (i = 0; i < 4; ++i ) {
3004 LLVMValueRef lindex = lp_build_const_int32(
3005 bld->bld_base.base.gallivm, index * 4 + i);
3006 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3007 bld->imms_array, &lindex, 1, "");
3008 LLVMBuildStore(builder,
3009 bld->immediates[index][i],
3010 imm_ptr);
3011 }
3012 }
3013 }
3014
3015 bld->num_immediates++;
3016 }
3017
3018 static void
3019 ddx_emit(
3020 const struct lp_build_tgsi_action * action,
3021 struct lp_build_tgsi_context * bld_base,
3022 struct lp_build_emit_data * emit_data)
3023 {
3024 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3025
3026 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3027 &emit_data->output[emit_data->chan], NULL);
3028 }
3029
3030 static void
3031 ddy_emit(
3032 const struct lp_build_tgsi_action * action,
3033 struct lp_build_tgsi_context * bld_base,
3034 struct lp_build_emit_data * emit_data)
3035 {
3036 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3037
3038 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3039 &emit_data->output[emit_data->chan]);
3040 }
3041
3042 static void
3043 kill_emit(
3044 const struct lp_build_tgsi_action * action,
3045 struct lp_build_tgsi_context * bld_base,
3046 struct lp_build_emit_data * emit_data)
3047 {
3048 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3049
3050 emit_kill(bld, bld_base->pc - 1);
3051 }
3052
3053 static void
3054 kill_if_emit(
3055 const struct lp_build_tgsi_action * action,
3056 struct lp_build_tgsi_context * bld_base,
3057 struct lp_build_emit_data * emit_data)
3058 {
3059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3060
3061 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3062 }
3063
3064 static void
3065 tex_emit(
3066 const struct lp_build_tgsi_action * action,
3067 struct lp_build_tgsi_context * bld_base,
3068 struct lp_build_emit_data * emit_data)
3069 {
3070 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3071
3072 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3073 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3074 }
3075
3076 static void
3077 tex2_emit(
3078 const struct lp_build_tgsi_action * action,
3079 struct lp_build_tgsi_context * bld_base,
3080 struct lp_build_emit_data * emit_data)
3081 {
3082 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3083
3084 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3085 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3086 }
3087
3088 static void
3089 txb_emit(
3090 const struct lp_build_tgsi_action * action,
3091 struct lp_build_tgsi_context * bld_base,
3092 struct lp_build_emit_data * emit_data)
3093 {
3094 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3095
3096 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3097 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3098 }
3099
3100 static void
3101 txb2_emit(
3102 const struct lp_build_tgsi_action * action,
3103 struct lp_build_tgsi_context * bld_base,
3104 struct lp_build_emit_data * emit_data)
3105 {
3106 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3107
3108 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3109 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3110 }
3111
3112 static void
3113 txd_emit(
3114 const struct lp_build_tgsi_action * action,
3115 struct lp_build_tgsi_context * bld_base,
3116 struct lp_build_emit_data * emit_data)
3117 {
3118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3119
3120 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3121 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3122 }
3123
3124 static void
3125 txl_emit(
3126 const struct lp_build_tgsi_action * action,
3127 struct lp_build_tgsi_context * bld_base,
3128 struct lp_build_emit_data * emit_data)
3129 {
3130 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3131
3132 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3133 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3134 }
3135
3136 static void
3137 txl2_emit(
3138 const struct lp_build_tgsi_action * action,
3139 struct lp_build_tgsi_context * bld_base,
3140 struct lp_build_emit_data * emit_data)
3141 {
3142 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3143
3144 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3145 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3146 }
3147
3148 static void
3149 txp_emit(
3150 const struct lp_build_tgsi_action * action,
3151 struct lp_build_tgsi_context * bld_base,
3152 struct lp_build_emit_data * emit_data)
3153 {
3154 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3155
3156 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3157 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3158 }
3159
3160 static void
3161 tg4_emit(
3162 const struct lp_build_tgsi_action * action,
3163 struct lp_build_tgsi_context * bld_base,
3164 struct lp_build_emit_data * emit_data)
3165 {
3166 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3167
3168 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3169 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3170 }
3171
3172 static void
3173 txq_emit(
3174 const struct lp_build_tgsi_action * action,
3175 struct lp_build_tgsi_context * bld_base,
3176 struct lp_build_emit_data * emit_data)
3177 {
3178 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3179
3180 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3181 }
3182
3183 static void
3184 txf_emit(
3185 const struct lp_build_tgsi_action * action,
3186 struct lp_build_tgsi_context * bld_base,
3187 struct lp_build_emit_data * emit_data)
3188 {
3189 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3190
3191 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3192 }
3193
3194 static void
3195 sample_i_emit(
3196 const struct lp_build_tgsi_action * action,
3197 struct lp_build_tgsi_context * bld_base,
3198 struct lp_build_emit_data * emit_data)
3199 {
3200 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3201
3202 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3203 }
3204
3205 static void
3206 sample_emit(
3207 const struct lp_build_tgsi_action * action,
3208 struct lp_build_tgsi_context * bld_base,
3209 struct lp_build_emit_data * emit_data)
3210 {
3211 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3212
3213 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3214 FALSE, emit_data->output);
3215 }
3216
3217 static void
3218 sample_b_emit(
3219 const struct lp_build_tgsi_action * action,
3220 struct lp_build_tgsi_context * bld_base,
3221 struct lp_build_emit_data * emit_data)
3222 {
3223 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3224
3225 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3226 FALSE, emit_data->output);
3227 }
3228
3229 static void
3230 sample_c_emit(
3231 const struct lp_build_tgsi_action * action,
3232 struct lp_build_tgsi_context * bld_base,
3233 struct lp_build_emit_data * emit_data)
3234 {
3235 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3236
3237 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3238 TRUE, emit_data->output);
3239 }
3240
3241 static void
3242 sample_c_lz_emit(
3243 const struct lp_build_tgsi_action * action,
3244 struct lp_build_tgsi_context * bld_base,
3245 struct lp_build_emit_data * emit_data)
3246 {
3247 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3248
3249 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3250 TRUE, emit_data->output);
3251 }
3252
3253 static void
3254 sample_d_emit(
3255 const struct lp_build_tgsi_action * action,
3256 struct lp_build_tgsi_context * bld_base,
3257 struct lp_build_emit_data * emit_data)
3258 {
3259 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3260
3261 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3262 FALSE, emit_data->output);
3263 }
3264
3265 static void
3266 sample_l_emit(
3267 const struct lp_build_tgsi_action * action,
3268 struct lp_build_tgsi_context * bld_base,
3269 struct lp_build_emit_data * emit_data)
3270 {
3271 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3272
3273 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3274 FALSE, emit_data->output);
3275 }
3276
3277 static void
3278 sviewinfo_emit(
3279 const struct lp_build_tgsi_action * action,
3280 struct lp_build_tgsi_context * bld_base,
3281 struct lp_build_emit_data * emit_data)
3282 {
3283 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3284
3285 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3286 }
3287
3288 static LLVMValueRef
3289 mask_vec(struct lp_build_tgsi_context *bld_base)
3290 {
3291 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3292 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3293 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3294
3295 if (!exec_mask->has_mask) {
3296 return lp_build_mask_value(bld->mask);
3297 }
3298 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3299 exec_mask->exec_mask, "");
3300 }
3301
3302 static void
3303 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3304 LLVMValueRef ptr,
3305 LLVMValueRef mask)
3306 {
3307 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3308 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3309
3310 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3311
3312 LLVMBuildStore(builder, current_vec, ptr);
3313 }
3314
3315 static void
3316 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3317 LLVMValueRef ptr,
3318 LLVMValueRef mask)
3319 {
3320 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3321 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3322
3323 current_vec = lp_build_select(&bld_base->uint_bld,
3324 mask,
3325 bld_base->uint_bld.zero,
3326 current_vec);
3327
3328 LLVMBuildStore(builder, current_vec, ptr);
3329 }
3330
3331 static LLVMValueRef
3332 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3333 LLVMValueRef current_mask_vec,
3334 LLVMValueRef total_emitted_vertices_vec)
3335 {
3336 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3337 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3338 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3339 total_emitted_vertices_vec,
3340 bld->max_output_vertices_vec);
3341
3342 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3343 }
3344
3345 static void
3346 emit_vertex(
3347 const struct lp_build_tgsi_action * action,
3348 struct lp_build_tgsi_context * bld_base,
3349 struct lp_build_emit_data * emit_data)
3350 {
3351 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3352 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3353
3354 if (bld->gs_iface->emit_vertex) {
3355 LLVMValueRef mask = mask_vec(bld_base);
3356 LLVMValueRef total_emitted_vertices_vec =
3357 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3358 mask = clamp_mask_to_max_output_vertices(bld, mask,
3359 total_emitted_vertices_vec);
3360 gather_outputs(bld);
3361 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3362 bld->outputs,
3363 total_emitted_vertices_vec);
3364 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3365 mask);
3366 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3367 mask);
3368 #if DUMP_GS_EMITS
3369 lp_build_print_value(bld->bld_base.base.gallivm,
3370 " +++ emit vertex masked ones = ",
3371 mask);
3372 lp_build_print_value(bld->bld_base.base.gallivm,
3373 " +++ emit vertex emitted = ",
3374 total_emitted_vertices_vec);
3375 #endif
3376 }
3377 }
3378
3379
3380 static void
3381 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3382 LLVMValueRef mask)
3383 {
3384 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3385 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3386
3387 if (bld->gs_iface->end_primitive) {
3388 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3389 LLVMValueRef emitted_vertices_vec =
3390 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3391 LLVMValueRef emitted_prims_vec =
3392 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3393
3394 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3395 emitted_vertices_vec,
3396 uint_bld->zero);
3397 /* We need to combine the current execution mask with the mask
3398 telling us which, if any, execution slots actually have
3399 unemitted primitives, this way we make sure that end_primitives
3400 executes only on the paths that have unflushed vertices */
3401 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3402
3403 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3404 emitted_vertices_vec,
3405 emitted_prims_vec);
3406
3407 #if DUMP_GS_EMITS
3408 lp_build_print_value(bld->bld_base.base.gallivm,
3409 " +++ end prim masked ones = ",
3410 mask);
3411 lp_build_print_value(bld->bld_base.base.gallivm,
3412 " +++ end prim emitted verts1 = ",
3413 emitted_vertices_vec);
3414 lp_build_print_value(bld->bld_base.base.gallivm,
3415 " +++ end prim emitted prims1 = ",
3416 LLVMBuildLoad(builder,
3417 bld->emitted_prims_vec_ptr, ""));
3418 #endif
3419 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3420 mask);
3421 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3422 mask);
3423 #if DUMP_GS_EMITS
3424 lp_build_print_value(bld->bld_base.base.gallivm,
3425 " +++ end prim emitted verts2 = ",
3426 LLVMBuildLoad(builder,
3427 bld->emitted_vertices_vec_ptr, ""));
3428 #endif
3429 }
3430
3431 }
3432
3433 static void
3434 end_primitive(
3435 const struct lp_build_tgsi_action * action,
3436 struct lp_build_tgsi_context * bld_base,
3437 struct lp_build_emit_data * emit_data)
3438 {
3439 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3440
3441 if (bld->gs_iface->end_primitive) {
3442 LLVMValueRef mask = mask_vec(bld_base);
3443 end_primitive_masked(bld_base, mask);
3444 }
3445 }
3446
3447 static void
3448 cal_emit(
3449 const struct lp_build_tgsi_action * action,
3450 struct lp_build_tgsi_context * bld_base,
3451 struct lp_build_emit_data * emit_data)
3452 {
3453 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3454
3455 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3456 &bld_base->pc);
3457 }
3458
3459 static void
3460 ret_emit(
3461 const struct lp_build_tgsi_action * action,
3462 struct lp_build_tgsi_context * bld_base,
3463 struct lp_build_emit_data * emit_data)
3464 {
3465 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3466
3467 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3468 }
3469
3470 static void
3471 brk_emit(
3472 const struct lp_build_tgsi_action * action,
3473 struct lp_build_tgsi_context * bld_base,
3474 struct lp_build_emit_data * emit_data)
3475 {
3476 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3477
3478 lp_exec_break(&bld->exec_mask, bld_base);
3479 }
3480
3481 static void
3482 breakc_emit(
3483 const struct lp_build_tgsi_action * action,
3484 struct lp_build_tgsi_context * bld_base,
3485 struct lp_build_emit_data * emit_data)
3486 {
3487 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3488 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3489 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3490 LLVMValueRef unsigned_cond =
3491 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3492 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3493 unsigned_cond,
3494 uint_bld->zero);
3495
3496 lp_exec_break_condition(&bld->exec_mask, cond);
3497 }
3498
3499 static void
3500 if_emit(
3501 const struct lp_build_tgsi_action * action,
3502 struct lp_build_tgsi_context * bld_base,
3503 struct lp_build_emit_data * emit_data)
3504 {
3505 LLVMValueRef tmp;
3506 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3507
3508 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3509 emit_data->args[0], bld->bld_base.base.zero);
3510 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3511 }
3512
3513 static void
3514 uif_emit(
3515 const struct lp_build_tgsi_action * action,
3516 struct lp_build_tgsi_context * bld_base,
3517 struct lp_build_emit_data * emit_data)
3518 {
3519 LLVMValueRef tmp;
3520 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3521 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3522
3523 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3524 emit_data->args[0], uint_bld->zero);
3525 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3526 }
3527
3528 static void
3529 case_emit(
3530 const struct lp_build_tgsi_action * action,
3531 struct lp_build_tgsi_context * bld_base,
3532 struct lp_build_emit_data * emit_data)
3533 {
3534 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3535
3536 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3537 }
3538
3539 static void
3540 default_emit(
3541 const struct lp_build_tgsi_action * action,
3542 struct lp_build_tgsi_context * bld_base,
3543 struct lp_build_emit_data * emit_data)
3544 {
3545 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3546
3547 lp_exec_default(&bld->exec_mask, bld_base);
3548 }
3549
3550 static void
3551 switch_emit(
3552 const struct lp_build_tgsi_action * action,
3553 struct lp_build_tgsi_context * bld_base,
3554 struct lp_build_emit_data * emit_data)
3555 {
3556 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3557
3558 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3559 }
3560
3561 static void
3562 endswitch_emit(
3563 const struct lp_build_tgsi_action * action,
3564 struct lp_build_tgsi_context * bld_base,
3565 struct lp_build_emit_data * emit_data)
3566 {
3567 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3568
3569 lp_exec_endswitch(&bld->exec_mask, bld_base);
3570 }
3571
3572 static void
3573 bgnloop_emit(
3574 const struct lp_build_tgsi_action * action,
3575 struct lp_build_tgsi_context * bld_base,
3576 struct lp_build_emit_data * emit_data)
3577 {
3578 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3579
3580 lp_exec_bgnloop(&bld->exec_mask);
3581 }
3582
3583 static void
3584 bgnsub_emit(
3585 const struct lp_build_tgsi_action * action,
3586 struct lp_build_tgsi_context * bld_base,
3587 struct lp_build_emit_data * emit_data)
3588 {
3589 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3590
3591 lp_exec_mask_bgnsub(&bld->exec_mask);
3592 }
3593
3594 static void
3595 else_emit(
3596 const struct lp_build_tgsi_action * action,
3597 struct lp_build_tgsi_context * bld_base,
3598 struct lp_build_emit_data * emit_data)
3599 {
3600 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3601
3602 lp_exec_mask_cond_invert(&bld->exec_mask);
3603 }
3604
3605 static void
3606 endif_emit(
3607 const struct lp_build_tgsi_action * action,
3608 struct lp_build_tgsi_context * bld_base,
3609 struct lp_build_emit_data * emit_data)
3610 {
3611 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3612
3613 lp_exec_mask_cond_pop(&bld->exec_mask);
3614 }
3615
3616 static void
3617 endloop_emit(
3618 const struct lp_build_tgsi_action * action,
3619 struct lp_build_tgsi_context * bld_base,
3620 struct lp_build_emit_data * emit_data)
3621 {
3622 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3623
3624 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3625 }
3626
3627 static void
3628 endsub_emit(
3629 const struct lp_build_tgsi_action * action,
3630 struct lp_build_tgsi_context * bld_base,
3631 struct lp_build_emit_data * emit_data)
3632 {
3633 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3634
3635 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3636 }
3637
3638 static void
3639 cont_emit(
3640 const struct lp_build_tgsi_action * action,
3641 struct lp_build_tgsi_context * bld_base,
3642 struct lp_build_emit_data * emit_data)
3643 {
3644 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3645
3646 lp_exec_continue(&bld->exec_mask);
3647 }
3648
3649 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3650 {
3651 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3652 struct gallivm_state * gallivm = bld_base->base.gallivm;
3653
3654 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3655 LLVMValueRef array_size =
3656 lp_build_const_int32(gallivm,
3657 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3658 bld->temps_array = lp_build_array_alloca(gallivm,
3659 bld_base->base.vec_type, array_size,
3660 "temp_array");
3661 }
3662
3663 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3664 LLVMValueRef array_size =
3665 lp_build_const_int32(gallivm,
3666 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3667 bld->outputs_array = lp_build_array_alloca(gallivm,
3668 bld_base->base.vec_type, array_size,
3669 "output_array");
3670 }
3671
3672 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3673 LLVMValueRef array_size =
3674 lp_build_const_int32(gallivm,
3675 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3676 bld->imms_array = lp_build_array_alloca(gallivm,
3677 bld_base->base.vec_type, array_size,
3678 "imms_array");
3679 }
3680
3681 /* If we have indirect addressing in inputs we need to copy them into
3682 * our alloca array to be able to iterate over them */
3683 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3684 unsigned index, chan;
3685 LLVMTypeRef vec_type = bld_base->base.vec_type;
3686 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3687 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3688 bld->inputs_array = lp_build_array_alloca(gallivm,
3689 vec_type, array_size,
3690 "input_array");
3691
3692 assert(bld_base->info->num_inputs
3693 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3694
3695 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3696 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3697 LLVMValueRef lindex =
3698 lp_build_const_int32(gallivm, index * 4 + chan);
3699 LLVMValueRef input_ptr =
3700 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3701 &lindex, 1, "");
3702 LLVMValueRef value = bld->inputs[index][chan];
3703 if (value)
3704 LLVMBuildStore(gallivm->builder, value, input_ptr);
3705 }
3706 }
3707 }
3708
3709 if (bld->gs_iface) {
3710 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3711 bld->emitted_prims_vec_ptr =
3712 lp_build_alloca(gallivm,
3713 uint_bld->vec_type,
3714 "emitted_prims_ptr");
3715 bld->emitted_vertices_vec_ptr =
3716 lp_build_alloca(gallivm,
3717 uint_bld->vec_type,
3718 "emitted_vertices_ptr");
3719 bld->total_emitted_vertices_vec_ptr =
3720 lp_build_alloca(gallivm,
3721 uint_bld->vec_type,
3722 "total_emitted_vertices_ptr");
3723
3724 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3725 bld->emitted_prims_vec_ptr);
3726 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3727 bld->emitted_vertices_vec_ptr);
3728 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3729 bld->total_emitted_vertices_vec_ptr);
3730 }
3731
3732 if (DEBUG_EXECUTION) {
3733 lp_build_printf(gallivm, "\n");
3734 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3735 if (!bld->gs_iface)
3736 emit_dump_file(bld, TGSI_FILE_INPUT);
3737 }
3738 }
3739
3740 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3741 {
3742 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3743 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3744
3745 if (DEBUG_EXECUTION) {
3746 /* for debugging */
3747 if (0) {
3748 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3749 }
3750 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3751 lp_build_printf(bld_base->base.gallivm, "\n");
3752 }
3753
3754 /* If we have indirect addressing in outputs we need to copy our alloca array
3755 * to the outputs slots specified by the caller */
3756 if (bld->gs_iface) {
3757 LLVMValueRef total_emitted_vertices_vec;
3758 LLVMValueRef emitted_prims_vec;
3759 /* implicit end_primitives, needed in case there are any unflushed
3760 vertices in the cache. Note must not call end_primitive here
3761 since the exec_mask is not valid at this point. */
3762 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3763
3764 total_emitted_vertices_vec =
3765 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3766 emitted_prims_vec =
3767 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3768
3769 bld->gs_iface->gs_epilogue(bld->gs_iface,
3770 &bld->bld_base,
3771 total_emitted_vertices_vec,
3772 emitted_prims_vec);
3773 } else {
3774 gather_outputs(bld);
3775 }
3776 }
3777
3778 void
3779 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3780 const struct tgsi_token *tokens,
3781 struct lp_type type,
3782 struct lp_build_mask_context *mask,
3783 LLVMValueRef consts_ptr,
3784 LLVMValueRef const_sizes_ptr,
3785 const struct lp_bld_tgsi_system_values *system_values,
3786 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3787 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3788 LLVMValueRef context_ptr,
3789 LLVMValueRef thread_data_ptr,
3790 struct lp_build_sampler_soa *sampler,
3791 const struct tgsi_shader_info *info,
3792 const struct lp_build_tgsi_gs_iface *gs_iface)
3793 {
3794 struct lp_build_tgsi_soa_context bld;
3795
3796 struct lp_type res_type;
3797
3798 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3799 memset(&res_type, 0, sizeof res_type);
3800 res_type.width = type.width;
3801 res_type.length = type.length;
3802 res_type.sign = 1;
3803
3804 /* Setup build context */
3805 memset(&bld, 0, sizeof bld);
3806 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3807 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3808 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3809 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3810 {
3811 struct lp_type dbl_type;
3812 dbl_type = type;
3813 dbl_type.width *= 2;
3814 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
3815 }
3816 {
3817 struct lp_type uint64_type;
3818 uint64_type = lp_uint_type(type);
3819 uint64_type.width *= 2;
3820 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
3821 }
3822 {
3823 struct lp_type int64_type;
3824 int64_type = lp_int_type(type);
3825 int64_type.width *= 2;
3826 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
3827 }
3828 bld.mask = mask;
3829 bld.inputs = inputs;
3830 bld.outputs = outputs;
3831 bld.consts_ptr = consts_ptr;
3832 bld.const_sizes_ptr = const_sizes_ptr;
3833 bld.sampler = sampler;
3834 bld.bld_base.info = info;
3835 bld.indirect_files = info->indirect_files;
3836 bld.context_ptr = context_ptr;
3837 bld.thread_data_ptr = thread_data_ptr;
3838
3839 /*
3840 * If the number of temporaries is rather large then we just
3841 * allocate them as an array right from the start and treat
3842 * like indirect temporaries.
3843 */
3844 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3845 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3846 }
3847 /*
3848 * For performance reason immediates are always backed in a static
3849 * array, but if their number is too great, we have to use just
3850 * a dynamically allocated array.
3851 */
3852 bld.use_immediates_array =
3853 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3854 if (bld.use_immediates_array) {
3855 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3856 }
3857
3858
3859 bld.bld_base.soa = TRUE;
3860 bld.bld_base.emit_debug = emit_debug;
3861 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3862 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3863 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3864 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3865 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3866 bld.bld_base.emit_store = emit_store;
3867
3868 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3869 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3870
3871 bld.bld_base.emit_prologue = emit_prologue;
3872 bld.bld_base.emit_epilogue = emit_epilogue;
3873
3874 /* Set opcode actions */
3875 lp_set_default_actions_cpu(&bld.bld_base);
3876
3877 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3878 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3879 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3880 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3881 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3882 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3883 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3884 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3885 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3886 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3887 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3888 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3889 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3890 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3891 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3892 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3893 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3894 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3895 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3896 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3897 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3898 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3899 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3900 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3901 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3902 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3903 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3904 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3905 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3906 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3907 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3908 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3909 /* DX10 sampling ops */
3910 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3911 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3912 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3913 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3914 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3915 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3916 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
3917 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3918 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3919
3920 if (gs_iface) {
3921 /* There's no specific value for this because it should always
3922 * be set, but apps using ext_geometry_shader4 quite often
3923 * were forgetting so we're using MAX_VERTEX_VARYING from
3924 * that spec even though we could debug_assert if it's not
3925 * set, but that's a lot uglier. */
3926 uint max_output_vertices;
3927
3928 /* inputs are always indirect with gs */
3929 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3930 bld.gs_iface = gs_iface;
3931 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3932 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3933 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3934
3935 max_output_vertices =
3936 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3937 if (!max_output_vertices)
3938 max_output_vertices = 32;
3939
3940 bld.max_output_vertices_vec =
3941 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3942 max_output_vertices);
3943 }
3944
3945 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3946
3947 bld.system_values = *system_values;
3948
3949 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3950
3951 if (0) {
3952 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3953 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3954 debug_printf("11111111111111111111111111111 \n");
3955 tgsi_dump(tokens, 0);
3956 lp_debug_dump_value(function);
3957 debug_printf("2222222222222222222222222222 \n");
3958 }
3959
3960 if (0) {
3961 LLVMModuleRef module = LLVMGetGlobalParent(
3962 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3963 LLVMDumpModule(module);
3964
3965 }
3966 lp_exec_mask_fini(&bld.exec_mask);
3967 }