Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 /* SM 4.0 says that subroutines can nest 32 deep and
70 * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72
73 #define DUMP_GS_EMITS 0
74
75 /*
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77 * instruction.
78 *
79 * TODO:
80 * - take execution masks in consideration
81 * - debug control-flow instructions
82 */
83 #define DEBUG_EXECUTION 0
84
85
86 /*
87 * Emit code to print a register value.
88 */
89 static void
90 emit_dump_reg(struct gallivm_state *gallivm,
91 unsigned file,
92 unsigned index,
93 unsigned chan,
94 LLVMValueRef value)
95 {
96 char buf[32];
97
98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
99 tgsi_file_name(file),
100 index, "xyzw"[chan]);
101
102 lp_build_print_value(gallivm, buf, value);
103 }
104
105 /*
106 * Return the context for the current function.
107 * (always 'main', if shader doesn't do any function calls)
108 */
109 static INLINE struct function_ctx *
110 func_ctx(struct lp_exec_mask *mask)
111 {
112 assert(mask->function_stack_size > 0);
113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114 return &mask->function_stack[mask->function_stack_size - 1];
115 }
116
117 /*
118 * Returns true if we're in a loop.
119 * It's global, meaning that it returns true even if there's
120 * no loop inside the current function, but we were inside
121 * a loop inside another function, from which this one was called.
122 */
123 static INLINE boolean
124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126 int i;
127 for (i = mask->function_stack_size - 1; i >= 0; --i) {
128 const struct function_ctx *ctx = &mask->function_stack[i];
129 if (ctx->loop_stack_size > 0)
130 return TRUE;
131 }
132 return FALSE;
133 }
134
135 /*
136 * Returns true if we're inside a switch statement.
137 * It's global, meaning that it returns true even if there's
138 * no switch in the current function, but we were inside
139 * a switch inside another function, from which this one was called.
140 */
141 static INLINE boolean
142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144 int i;
145 for (i = mask->function_stack_size - 1; i >= 0; --i) {
146 const struct function_ctx *ctx = &mask->function_stack[i];
147 if (ctx->switch_stack_size > 0)
148 return TRUE;
149 }
150 return FALSE;
151 }
152
153 /*
154 * Returns true if we're inside a conditional.
155 * It's global, meaning that it returns true even if there's
156 * no conditional in the current function, but we were inside
157 * a conditional inside another function, from which this one was called.
158 */
159 static INLINE boolean
160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162 int i;
163 for (i = mask->function_stack_size - 1; i >= 0; --i) {
164 const struct function_ctx *ctx = &mask->function_stack[i];
165 if (ctx->cond_stack_size > 0)
166 return TRUE;
167 }
168 return FALSE;
169 }
170
171
172 /*
173 * Initialize a function context at the specified index.
174 */
175 static void
176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179 LLVMBuilderRef builder = mask->bld->gallivm->builder;
180 struct function_ctx *ctx = &mask->function_stack[function_idx];
181
182 ctx->cond_stack_size = 0;
183 ctx->loop_stack_size = 0;
184 ctx->switch_stack_size = 0;
185
186 if (function_idx == 0) {
187 ctx->ret_mask = mask->ret_mask;
188 }
189
190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191 int_type, "looplimiter");
192 LLVMBuildStore(
193 builder,
194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195 ctx->loop_limiter);
196 }
197
198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200 mask->bld = bld;
201 mask->has_mask = FALSE;
202 mask->ret_in_main = FALSE;
203 /* For the main function */
204 mask->function_stack_size = 1;
205
206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208 mask->cond_mask = mask->switch_mask =
209 LLVMConstAllOnes(mask->int_vec_type);
210
211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212 sizeof(mask->function_stack[0]));
213 lp_exec_mask_function_init(mask, 0);
214 }
215
216 static void
217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219 FREE(mask->function_stack);
220 }
221
222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224 LLVMBuilderRef builder = mask->bld->gallivm->builder;
225 boolean has_loop_mask = mask_has_loop(mask);
226 boolean has_cond_mask = mask_has_cond(mask);
227 boolean has_switch_mask = mask_has_switch(mask);
228 boolean has_ret_mask = mask->function_stack_size > 1 ||
229 mask->ret_in_main;
230
231 if (has_loop_mask) {
232 /*for loops we need to update the entire mask at runtime */
233 LLVMValueRef tmp;
234 assert(mask->break_mask);
235 tmp = LLVMBuildAnd(builder,
236 mask->cont_mask,
237 mask->break_mask,
238 "maskcb");
239 mask->exec_mask = LLVMBuildAnd(builder,
240 mask->cond_mask,
241 tmp,
242 "maskfull");
243 } else
244 mask->exec_mask = mask->cond_mask;
245
246 if (has_switch_mask) {
247 mask->exec_mask = LLVMBuildAnd(builder,
248 mask->exec_mask,
249 mask->switch_mask,
250 "switchmask");
251 }
252
253 if (has_ret_mask) {
254 mask->exec_mask = LLVMBuildAnd(builder,
255 mask->exec_mask,
256 mask->ret_mask,
257 "callmask");
258 }
259
260 mask->has_mask = (has_cond_mask ||
261 has_loop_mask ||
262 has_switch_mask ||
263 has_ret_mask);
264 }
265
266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267 LLVMValueRef val)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 struct function_ctx *ctx = func_ctx(mask);
271
272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273 ctx->cond_stack_size++;
274 return;
275 }
276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278 }
279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280 assert(LLVMTypeOf(val) == mask->int_vec_type);
281 mask->cond_mask = LLVMBuildAnd(builder,
282 mask->cond_mask,
283 val,
284 "");
285 lp_exec_mask_update(mask);
286 }
287
288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292 LLVMValueRef prev_mask;
293 LLVMValueRef inv_mask;
294
295 assert(ctx->cond_stack_size);
296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297 return;
298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301 }
302
303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
305 mask->cond_mask = LLVMBuildAnd(builder,
306 inv_mask,
307 prev_mask, "");
308 lp_exec_mask_update(mask);
309 }
310
311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313 struct function_ctx *ctx = func_ctx(mask);
314 assert(ctx->cond_stack_size);
315 --ctx->cond_stack_size;
316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317 return;
318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319 lp_exec_mask_update(mask);
320 }
321
322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324 LLVMBuilderRef builder = mask->bld->gallivm->builder;
325 struct function_ctx *ctx = func_ctx(mask);
326
327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328 ++ctx->loop_stack_size;
329 return;
330 }
331
332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333 ctx->break_type;
334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340 ++ctx->loop_stack_size;
341
342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
347 LLVMBuildBr(builder, ctx->loop_block);
348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
352 lp_exec_mask_update(mask);
353 }
354
355 static void lp_exec_break(struct lp_exec_mask *mask,
356 struct lp_build_tgsi_context * bld_base)
357 {
358 LLVMBuilderRef builder = mask->bld->gallivm->builder;
359 struct function_ctx *ctx = func_ctx(mask);
360
361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362 LLVMValueRef exec_mask = LLVMBuildNot(builder,
363 mask->exec_mask,
364 "break");
365
366 mask->break_mask = LLVMBuildAnd(builder,
367 mask->break_mask,
368 exec_mask, "break_full");
369 }
370 else {
371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373 opcode == TGSI_OPCODE_CASE);
374
375
376 if (ctx->switch_in_default) {
377 /*
378 * stop default execution but only if this is an unconditional switch.
379 * (The condition here is not perfect since dead code after break is
380 * allowed but should be sufficient since false negatives are just
381 * unoptimized - so we don't have to pre-evaluate that).
382 */
383 if(break_always && ctx->switch_pc) {
384 bld_base->pc = ctx->switch_pc;
385 return;
386 }
387 }
388
389 if (break_always) {
390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391 }
392 else {
393 LLVMValueRef exec_mask = LLVMBuildNot(builder,
394 mask->exec_mask,
395 "break");
396 mask->switch_mask = LLVMBuildAnd(builder,
397 mask->switch_mask,
398 exec_mask, "break_switch");
399 }
400 }
401
402 lp_exec_mask_update(mask);
403 }
404
405 static void lp_exec_break_condition(struct lp_exec_mask *mask,
406 LLVMValueRef cond)
407 {
408 LLVMBuilderRef builder = mask->bld->gallivm->builder;
409 struct function_ctx *ctx = func_ctx(mask);
410 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
411 mask->exec_mask,
412 cond, "cond_mask");
413 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
414
415 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
416 mask->break_mask = LLVMBuildAnd(builder,
417 mask->break_mask,
418 cond_mask, "breakc_full");
419 }
420 else {
421 mask->switch_mask = LLVMBuildAnd(builder,
422 mask->switch_mask,
423 cond_mask, "breakc_switch");
424 }
425
426 lp_exec_mask_update(mask);
427 }
428
429 static void lp_exec_continue(struct lp_exec_mask *mask)
430 {
431 LLVMBuilderRef builder = mask->bld->gallivm->builder;
432 LLVMValueRef exec_mask = LLVMBuildNot(builder,
433 mask->exec_mask,
434 "");
435
436 mask->cont_mask = LLVMBuildAnd(builder,
437 mask->cont_mask,
438 exec_mask, "");
439
440 lp_exec_mask_update(mask);
441 }
442
443
444 static void lp_exec_endloop(struct gallivm_state *gallivm,
445 struct lp_exec_mask *mask)
446 {
447 LLVMBuilderRef builder = mask->bld->gallivm->builder;
448 struct function_ctx *ctx = func_ctx(mask);
449 LLVMBasicBlockRef endloop;
450 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
451 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
452 mask->bld->type.width *
453 mask->bld->type.length);
454 LLVMValueRef i1cond, i2cond, icond, limiter;
455
456 assert(mask->break_mask);
457
458
459 assert(ctx->loop_stack_size);
460 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
461 --ctx->loop_stack_size;
462 return;
463 }
464
465 /*
466 * Restore the cont_mask, but don't pop
467 */
468 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
469 lp_exec_mask_update(mask);
470
471 /*
472 * Unlike the continue mask, the break_mask must be preserved across loop
473 * iterations
474 */
475 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
476
477 /* Decrement the loop limiter */
478 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
479
480 limiter = LLVMBuildSub(
481 builder,
482 limiter,
483 LLVMConstInt(int_type, 1, false),
484 "");
485
486 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
487
488 /* i1cond = (mask != 0) */
489 i1cond = LLVMBuildICmp(
490 builder,
491 LLVMIntNE,
492 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
493 LLVMConstNull(reg_type), "i1cond");
494
495 /* i2cond = (looplimiter > 0) */
496 i2cond = LLVMBuildICmp(
497 builder,
498 LLVMIntSGT,
499 limiter,
500 LLVMConstNull(int_type), "i2cond");
501
502 /* if( i1cond && i2cond ) */
503 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
504
505 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
506
507 LLVMBuildCondBr(builder,
508 icond, ctx->loop_block, endloop);
509
510 LLVMPositionBuilderAtEnd(builder, endloop);
511
512 assert(ctx->loop_stack_size);
513 --ctx->loop_stack_size;
514 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
515 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
516 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
517 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
518 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
519 ctx->switch_stack_size];
520
521 lp_exec_mask_update(mask);
522 }
523
524 static void lp_exec_switch(struct lp_exec_mask *mask,
525 LLVMValueRef switchval)
526 {
527 struct function_ctx *ctx = func_ctx(mask);
528
529 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
530 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
531 ctx->switch_stack_size++;
532 return;
533 }
534
535 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
536 ctx->break_type;
537 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
538
539 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
540 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
541 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
542 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
543 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
544 ctx->switch_stack_size++;
545
546 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
547 ctx->switch_val = switchval;
548 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
549 ctx->switch_in_default = false;
550 ctx->switch_pc = 0;
551
552 lp_exec_mask_update(mask);
553 }
554
555 static void lp_exec_endswitch(struct lp_exec_mask *mask,
556 struct lp_build_tgsi_context * bld_base)
557 {
558 LLVMBuilderRef builder = mask->bld->gallivm->builder;
559 struct function_ctx *ctx = func_ctx(mask);
560
561 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
562 ctx->switch_stack_size--;
563 return;
564 }
565
566 /* check if there's deferred default if so do it now */
567 if (ctx->switch_pc && !ctx->switch_in_default) {
568 LLVMValueRef prevmask, defaultmask;
569 unsigned tmp_pc;
570 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
571 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
572 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
573 ctx->switch_in_default = true;
574
575 lp_exec_mask_update(mask);
576
577 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
578 TGSI_OPCODE_DEFAULT);
579
580 tmp_pc = bld_base->pc;
581 bld_base->pc = ctx->switch_pc;
582 /*
583 * re-purpose switch_pc to point to here again, since we stop execution of
584 * the deferred default after next break.
585 */
586 ctx->switch_pc = tmp_pc - 1;
587
588 return;
589 }
590
591 else if (ctx->switch_pc && ctx->switch_in_default) {
592 assert(bld_base->pc == ctx->switch_pc + 1);
593 }
594
595 ctx->switch_stack_size--;
596 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
597 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
598 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
599 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
600 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
601
602 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
603
604 lp_exec_mask_update(mask);
605 }
606
607 static void lp_exec_case(struct lp_exec_mask *mask,
608 LLVMValueRef caseval)
609 {
610 LLVMBuilderRef builder = mask->bld->gallivm->builder;
611 struct function_ctx *ctx = func_ctx(mask);
612
613 LLVMValueRef casemask, prevmask;
614
615 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
616 return;
617 }
618
619 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
620 if (!ctx->switch_in_default) {
621 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
622 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
623 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
624 ctx->switch_mask_default, "sw_default_mask");
625 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
626 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
627
628 lp_exec_mask_update(mask);
629 }
630 }
631
632 /*
633 * Analyse default statement in a switch.
634 * \return true if default is last statement, false otherwise
635 * \param default_pc_start contains pc of instruction to jump to
636 * if default wasn't last but there's no
637 * fallthrough into default.
638 */
639 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
640 struct lp_build_tgsi_context * bld_base,
641 int *default_pc_start)
642 {
643 unsigned pc = bld_base->pc;
644 struct function_ctx *ctx = func_ctx(mask);
645 unsigned curr_switch_stack = ctx->switch_stack_size;
646
647 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
648 return false;
649 }
650
651 /* skip over case statements which are together with default */
652 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
653 pc++;
654 }
655
656 while (pc != -1 && pc < bld_base->num_instructions) {
657 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
658 switch (opcode) {
659 case TGSI_OPCODE_CASE:
660 if (curr_switch_stack == ctx->switch_stack_size) {
661 *default_pc_start = pc - 1;
662 return false;
663 }
664 break;
665 case TGSI_OPCODE_SWITCH:
666 curr_switch_stack++;
667 break;
668 case TGSI_OPCODE_ENDSWITCH:
669 if (curr_switch_stack == ctx->switch_stack_size) {
670 *default_pc_start = pc - 1;
671 return true;
672 }
673 curr_switch_stack--;
674 break;
675 }
676 pc++;
677 }
678 /* should never arrive here */
679 assert(0);
680 return true;
681 }
682
683 static void lp_exec_default(struct lp_exec_mask *mask,
684 struct lp_build_tgsi_context * bld_base)
685 {
686 LLVMBuilderRef builder = mask->bld->gallivm->builder;
687 struct function_ctx *ctx = func_ctx(mask);
688
689 int default_exec_pc;
690 boolean default_is_last;
691
692 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693 return;
694 }
695
696 /*
697 * This is a messy opcode, because it may not be always at the end and
698 * there can be fallthrough in and out of it.
699 */
700
701 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702 /*
703 * If it is last statement in switch (note that case statements appearing
704 * "at the same time" as default don't change that) everything is just fine,
705 * update switch mask and go on. This means we can handle default with
706 * fallthrough INTO it without overhead, if it is last.
707 */
708 if (default_is_last) {
709 LLVMValueRef prevmask, defaultmask;
710 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714 ctx->switch_in_default = true;
715
716 lp_exec_mask_update(mask);
717 }
718 else {
719 /*
720 * Technically, "case" immediately before default isn't really a
721 * fallthrough, however we still have to count them as such as we
722 * already have updated the masks.
723 * If that happens in practice could add a switch optimizer pass
724 * which just gets rid of all case statements appearing together with
725 * default (or could do switch analysis at switch start time instead).
726 */
727 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
728 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
729 opcode != TGSI_OPCODE_SWITCH);
730 /*
731 * If it is not last statement and there was no fallthrough into it,
732 * we record the PC and continue execution at next case (again, those
733 * case encountered at the same time don't count). At endswitch
734 * time, we update switchmask, and go back executing the code we skipped
735 * until the next break (possibly re-executing some code with changed mask
736 * if there was a fallthrough out of default).
737 * Finally, if it is not last statement and there was a fallthrough into it,
738 * do the same as with the former case, except instead of skipping the code
739 * just execute it without updating the mask, then go back and re-execute.
740 */
741 ctx->switch_pc = bld_base->pc;
742 if (!ft_into) {
743 bld_base->pc = default_exec_pc;
744 }
745 }
746 }
747
748
749 /* stores val into an address pointed to by dst_ptr.
750 * mask->exec_mask is used to figure out which bits of val
751 * should be stored into the address
752 * (0 means don't store this bit, 1 means do store).
753 */
754 static void lp_exec_mask_store(struct lp_exec_mask *mask,
755 struct lp_build_context *bld_store,
756 LLVMValueRef pred,
757 LLVMValueRef val,
758 LLVMValueRef dst_ptr)
759 {
760 LLVMBuilderRef builder = mask->bld->gallivm->builder;
761
762 assert(lp_check_value(bld_store->type, val));
763 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
764 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
765
766 /* Mix the predicate and execution mask */
767 if (mask->has_mask) {
768 if (pred) {
769 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
770 } else {
771 pred = mask->exec_mask;
772 }
773 }
774
775 if (pred) {
776 LLVMValueRef res, dst;
777
778 dst = LLVMBuildLoad(builder, dst_ptr, "");
779 res = lp_build_select(bld_store, pred, val, dst);
780 LLVMBuildStore(builder, res, dst_ptr);
781 } else
782 LLVMBuildStore(builder, val, dst_ptr);
783 }
784
785 static void lp_exec_mask_call(struct lp_exec_mask *mask,
786 int func,
787 int *pc)
788 {
789 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
790 return;
791 }
792
793 lp_exec_mask_function_init(mask, mask->function_stack_size);
794 mask->function_stack[mask->function_stack_size].pc = *pc;
795 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
796 mask->function_stack_size++;
797 *pc = func;
798 }
799
800 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
801 {
802 LLVMBuilderRef builder = mask->bld->gallivm->builder;
803 struct function_ctx *ctx = func_ctx(mask);
804 LLVMValueRef exec_mask;
805
806 if (ctx->cond_stack_size == 0 &&
807 ctx->loop_stack_size == 0 &&
808 ctx->switch_stack_size == 0 &&
809 mask->function_stack_size == 1) {
810 /* returning from main() */
811 *pc = -1;
812 return;
813 }
814
815 if (mask->function_stack_size == 1) {
816 /*
817 * This requires special handling since we need to ensure
818 * we don't drop the mask even if we have no call stack
819 * (e.g. after a ret in a if clause after the endif)
820 */
821 mask->ret_in_main = TRUE;
822 }
823
824 exec_mask = LLVMBuildNot(builder,
825 mask->exec_mask,
826 "ret");
827
828 mask->ret_mask = LLVMBuildAnd(builder,
829 mask->ret_mask,
830 exec_mask, "ret_full");
831
832 lp_exec_mask_update(mask);
833 }
834
835 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
836 {
837 }
838
839 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
840 {
841 struct function_ctx *ctx;
842
843 assert(mask->function_stack_size > 1);
844 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
845
846 ctx = func_ctx(mask);
847 mask->function_stack_size--;
848
849 *pc = ctx->pc;
850 mask->ret_mask = ctx->ret_mask;
851
852 lp_exec_mask_update(mask);
853 }
854
855
856 static LLVMValueRef
857 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
858 unsigned file,
859 unsigned index,
860 unsigned chan)
861 {
862 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
863 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
864 LLVMValueRef var_of_array;
865
866 switch (file) {
867 case TGSI_FILE_TEMPORARY:
868 array_of_vars = bld->temps;
869 var_of_array = bld->temps_array;
870 break;
871 case TGSI_FILE_OUTPUT:
872 array_of_vars = bld->outputs;
873 var_of_array = bld->outputs_array;
874 break;
875 default:
876 assert(0);
877 return NULL;
878 }
879
880 assert(chan < 4);
881
882 if (bld->indirect_files & (1 << file)) {
883 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
884 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
885 }
886 else {
887 assert(index <= bld->bld_base.info->file_max[file]);
888 return array_of_vars[index][chan];
889 }
890 }
891
892
893 /**
894 * Return pointer to a temporary register channel (src or dest).
895 * Note that indirect addressing cannot be handled here.
896 * \param index which temporary register
897 * \param chan which channel of the temp register.
898 */
899 LLVMValueRef
900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
901 unsigned index,
902 unsigned chan)
903 {
904 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
905 }
906
907 /**
908 * Return pointer to a output register channel (src or dest).
909 * Note that indirect addressing cannot be handled here.
910 * \param index which output register
911 * \param chan which channel of the output register.
912 */
913 LLVMValueRef
914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
915 unsigned index,
916 unsigned chan)
917 {
918 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
919 }
920
921 /*
922 * If we have indirect addressing in outputs copy our alloca array
923 * to the outputs slots specified by the caller to make sure
924 * our outputs are delivered consistently via the same interface.
925 */
926 static void
927 gather_outputs(struct lp_build_tgsi_soa_context * bld)
928 {
929 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
930 unsigned index, chan;
931 assert(bld->bld_base.info->num_outputs <=
932 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
933 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
934 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
935 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
936 }
937 }
938 }
939 }
940
941 /**
942 * Gather vector.
943 * XXX the lp_build_gather() function should be capable of doing this
944 * with a little work.
945 */
946 static LLVMValueRef
947 build_gather(struct lp_build_tgsi_context *bld_base,
948 LLVMValueRef base_ptr,
949 LLVMValueRef indexes,
950 LLVMValueRef overflow_mask)
951 {
952 struct gallivm_state *gallivm = bld_base->base.gallivm;
953 LLVMBuilderRef builder = gallivm->builder;
954 struct lp_build_context *uint_bld = &bld_base->uint_bld;
955 struct lp_build_context *bld = &bld_base->base;
956 LLVMValueRef res = bld->undef;
957 unsigned i;
958
959 /*
960 * overflow_mask is a vector telling us which channels
961 * in the vector overflowed. We use the overflow behavior for
962 * constant buffers which is defined as:
963 * Out of bounds access to constant buffer returns 0 in all
964 * components. Out of bounds behavior is always with respect
965 * to the size of the buffer bound at that slot.
966 */
967
968 if (overflow_mask) {
969 /*
970 * We avoid per-element control flow here (also due to llvm going crazy,
971 * though I suspect it's better anyway since overflow is likely rare).
972 * Note that since we still fetch from buffers even if num_elements was
973 * zero (in this case we'll fetch from index zero) the jit func callers
974 * MUST provide valid fake constant buffers of size 4x32 (the values do
975 * not matter), otherwise we'd still need (not per element though)
976 * control flow.
977 */
978 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
979 }
980
981 /*
982 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
983 */
984 for (i = 0; i < bld->type.length; i++) {
985 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
986 LLVMValueRef index = LLVMBuildExtractElement(builder,
987 indexes, ii, "");
988 LLVMValueRef scalar_ptr, scalar;
989
990 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
991 &index, 1, "gather_ptr");
992 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
993
994 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
995 }
996
997 if (overflow_mask) {
998 res = lp_build_select(bld, overflow_mask, bld->zero, res);
999 }
1000
1001 return res;
1002 }
1003
1004
1005 /**
1006 * Scatter/store vector.
1007 */
1008 static void
1009 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1010 LLVMValueRef base_ptr,
1011 LLVMValueRef indexes,
1012 LLVMValueRef values,
1013 struct lp_exec_mask *mask,
1014 LLVMValueRef pred)
1015 {
1016 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1017 LLVMBuilderRef builder = gallivm->builder;
1018 unsigned i;
1019
1020 /* Mix the predicate and execution mask */
1021 if (mask->has_mask) {
1022 if (pred) {
1023 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
1024 }
1025 else {
1026 pred = mask->exec_mask;
1027 }
1028 }
1029
1030 /*
1031 * Loop over elements of index_vec, store scalar value.
1032 */
1033 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1034 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1035 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1036 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1037 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1038 LLVMValueRef scalar_pred = pred ?
1039 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1040
1041 if (0)
1042 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1043 ii, val, index, scalar_ptr);
1044
1045 if (scalar_pred) {
1046 LLVMValueRef real_val, dst_val;
1047 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1048 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1049 LLVMBuildStore(builder, real_val, scalar_ptr);
1050 }
1051 else {
1052 LLVMBuildStore(builder, val, scalar_ptr);
1053 }
1054 }
1055 }
1056
1057
1058 /**
1059 * Read the current value of the ADDR register, convert the floats to
1060 * ints, add the base index and return the vector of offsets.
1061 * The offsets will be used to index into the constant buffer or
1062 * temporary register file.
1063 */
1064 static LLVMValueRef
1065 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1066 unsigned reg_file, unsigned reg_index,
1067 const struct tgsi_ind_register *indirect_reg)
1068 {
1069 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1070 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1071 /* always use X component of address register */
1072 unsigned swizzle = indirect_reg->Swizzle;
1073 LLVMValueRef base;
1074 LLVMValueRef rel;
1075 LLVMValueRef max_index;
1076 LLVMValueRef index;
1077
1078 assert(bld->indirect_files & (1 << reg_file));
1079
1080 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1081
1082 assert(swizzle < 4);
1083 switch (indirect_reg->File) {
1084 case TGSI_FILE_ADDRESS:
1085 rel = LLVMBuildLoad(builder,
1086 bld->addr[indirect_reg->Index][swizzle],
1087 "load addr reg");
1088 /* ADDR LLVM values already have LLVM integer type. */
1089 break;
1090 case TGSI_FILE_TEMPORARY:
1091 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1092 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1093 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1094 * value actually stored is expected to be an integer */
1095 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1096 break;
1097 default:
1098 assert(0);
1099 rel = uint_bld->zero;
1100 }
1101
1102 index = lp_build_add(uint_bld, base, rel);
1103
1104 /*
1105 * emit_fetch_constant handles constant buffer overflow so this code
1106 * is pointless for them.
1107 * Furthermore the D3D10 spec in section 6.5 says:
1108 * If the constant buffer bound to a slot is larger than the size
1109 * declared in the shader for that slot, implementations are allowed
1110 * to return incorrect data (not necessarily 0) for indices that are
1111 * larger than the declared size but smaller than the buffer size.
1112 */
1113 if (reg_file != TGSI_FILE_CONSTANT) {
1114 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1115 uint_bld->type,
1116 bld->bld_base.info->file_max[reg_file]);
1117
1118 assert(!uint_bld->type.sign);
1119 index = lp_build_min(uint_bld, index, max_index);
1120 }
1121
1122 return index;
1123 }
1124
1125 static struct lp_build_context *
1126 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1127 enum tgsi_opcode_type stype)
1128 {
1129 struct lp_build_context *bld_fetch;
1130
1131 switch (stype) {
1132 case TGSI_TYPE_FLOAT:
1133 case TGSI_TYPE_UNTYPED:
1134 bld_fetch = &bld_base->base;
1135 break;
1136 case TGSI_TYPE_UNSIGNED:
1137 bld_fetch = &bld_base->uint_bld;
1138 break;
1139 case TGSI_TYPE_SIGNED:
1140 bld_fetch = &bld_base->int_bld;
1141 break;
1142 case TGSI_TYPE_VOID:
1143 case TGSI_TYPE_DOUBLE:
1144 default:
1145 assert(0);
1146 bld_fetch = NULL;
1147 break;
1148 }
1149 return bld_fetch;
1150 }
1151
1152 static LLVMValueRef
1153 get_soa_array_offsets(struct lp_build_context *uint_bld,
1154 LLVMValueRef indirect_index,
1155 unsigned chan_index,
1156 boolean need_perelement_offset)
1157 {
1158 struct gallivm_state *gallivm = uint_bld->gallivm;
1159 LLVMValueRef chan_vec =
1160 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1161 LLVMValueRef length_vec =
1162 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1163 LLVMValueRef index_vec;
1164
1165 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1166 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1167 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1168 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1169
1170 if (need_perelement_offset) {
1171 LLVMValueRef pixel_offsets;
1172 int i;
1173 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1174 pixel_offsets = uint_bld->undef;
1175 for (i = 0; i < uint_bld->type.length; i++) {
1176 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1177 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1178 ii, ii, "");
1179 }
1180 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1181 }
1182 return index_vec;
1183 }
1184
1185 static LLVMValueRef
1186 emit_fetch_constant(
1187 struct lp_build_tgsi_context * bld_base,
1188 const struct tgsi_full_src_register * reg,
1189 enum tgsi_opcode_type stype,
1190 unsigned swizzle)
1191 {
1192 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1193 struct gallivm_state *gallivm = bld_base->base.gallivm;
1194 LLVMBuilderRef builder = gallivm->builder;
1195 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1196 unsigned dimension = 0;
1197 LLVMValueRef consts_ptr;
1198 LLVMValueRef num_consts;
1199 LLVMValueRef res;
1200
1201 /* XXX: Handle fetching xyzw components as a vector */
1202 assert(swizzle != ~0);
1203
1204 if (reg->Register.Dimension) {
1205 assert(!reg->Dimension.Indirect);
1206 dimension = reg->Dimension.Index;
1207 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1208 }
1209
1210 consts_ptr = bld->consts[dimension];
1211 num_consts = bld->consts_sizes[dimension];
1212
1213 if (reg->Register.Indirect) {
1214 LLVMValueRef indirect_index;
1215 LLVMValueRef swizzle_vec =
1216 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1217 LLVMValueRef index_vec; /* index into the const buffer */
1218 LLVMValueRef overflow_mask;
1219
1220 indirect_index = get_indirect_index(bld,
1221 reg->Register.File,
1222 reg->Register.Index,
1223 &reg->Indirect);
1224
1225 /* All fetches are from the same constant buffer, so
1226 * we need to propagate the size to a vector to do a
1227 * vector comparison */
1228 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1229 /* Construct a boolean vector telling us which channels
1230 * overflow the bound constant buffer */
1231 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1232 indirect_index, num_consts);
1233
1234 /* index_vec = indirect_index * 4 + swizzle */
1235 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1236 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1237
1238 /* Gather values from the constant buffer */
1239 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask);
1240 }
1241 else {
1242 LLVMValueRef index; /* index into the const buffer */
1243 LLVMValueRef scalar, scalar_ptr;
1244
1245 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1246
1247 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1248 &index, 1, "");
1249 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1250 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
1251 }
1252
1253 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1254 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1255 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1256 }
1257
1258 return res;
1259 }
1260
1261 static LLVMValueRef
1262 emit_fetch_immediate(
1263 struct lp_build_tgsi_context * bld_base,
1264 const struct tgsi_full_src_register * reg,
1265 enum tgsi_opcode_type stype,
1266 unsigned swizzle)
1267 {
1268 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1269 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1270 LLVMBuilderRef builder = gallivm->builder;
1271 LLVMValueRef res = NULL;
1272
1273 if (bld->use_immediates_array || reg->Register.Indirect) {
1274 LLVMValueRef imms_array;
1275 LLVMTypeRef fptr_type;
1276
1277 /* cast imms_array pointer to float* */
1278 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1279 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1280
1281 if (reg->Register.Indirect) {
1282 LLVMValueRef indirect_index;
1283 LLVMValueRef index_vec; /* index into the immediate register array */
1284
1285 indirect_index = get_indirect_index(bld,
1286 reg->Register.File,
1287 reg->Register.Index,
1288 &reg->Indirect);
1289 /*
1290 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1291 * immediates are stored as full vectors (FIXME??? - might be better
1292 * to store them the same as constants) but all elements are the same
1293 * in any case.
1294 */
1295 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1296 indirect_index,
1297 swizzle,
1298 FALSE);
1299
1300 /* Gather values from the immediate register array */
1301 res = build_gather(bld_base, imms_array, index_vec, NULL);
1302 } else {
1303 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1304 reg->Register.Index * 4 + swizzle);
1305 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1306 bld->imms_array, &lindex, 1, "");
1307 res = LLVMBuildLoad(builder, imms_ptr, "");
1308 }
1309 }
1310 else {
1311 res = bld->immediates[reg->Register.Index][swizzle];
1312 }
1313
1314 if (stype == TGSI_TYPE_UNSIGNED) {
1315 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1316 } else if (stype == TGSI_TYPE_SIGNED) {
1317 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1318 }
1319 return res;
1320 }
1321
1322 static LLVMValueRef
1323 emit_fetch_input(
1324 struct lp_build_tgsi_context * bld_base,
1325 const struct tgsi_full_src_register * reg,
1326 enum tgsi_opcode_type stype,
1327 unsigned swizzle)
1328 {
1329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1330 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1331 LLVMBuilderRef builder = gallivm->builder;
1332 LLVMValueRef res;
1333
1334 if (reg->Register.Indirect) {
1335 LLVMValueRef indirect_index;
1336 LLVMValueRef index_vec; /* index into the input reg array */
1337 LLVMValueRef inputs_array;
1338 LLVMTypeRef fptr_type;
1339
1340 indirect_index = get_indirect_index(bld,
1341 reg->Register.File,
1342 reg->Register.Index,
1343 &reg->Indirect);
1344
1345 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1346 indirect_index,
1347 swizzle,
1348 TRUE);
1349
1350 /* cast inputs_array pointer to float* */
1351 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1352 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1353
1354 /* Gather values from the input register array */
1355 res = build_gather(bld_base, inputs_array, index_vec, NULL);
1356 } else {
1357 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1358 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1359 reg->Register.Index * 4 + swizzle);
1360 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1361 bld->inputs_array, &lindex, 1, "");
1362 res = LLVMBuildLoad(builder, input_ptr, "");
1363 }
1364 else {
1365 res = bld->inputs[reg->Register.Index][swizzle];
1366 }
1367 }
1368
1369 assert(res);
1370
1371 if (stype == TGSI_TYPE_UNSIGNED) {
1372 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1373 } else if (stype == TGSI_TYPE_SIGNED) {
1374 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1375 }
1376
1377 return res;
1378 }
1379
1380
1381 static LLVMValueRef
1382 emit_fetch_gs_input(
1383 struct lp_build_tgsi_context * bld_base,
1384 const struct tgsi_full_src_register * reg,
1385 enum tgsi_opcode_type stype,
1386 unsigned swizzle)
1387 {
1388 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1389 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1390 const struct tgsi_shader_info *info = bld->bld_base.info;
1391 LLVMBuilderRef builder = gallivm->builder;
1392 LLVMValueRef attrib_index = NULL;
1393 LLVMValueRef vertex_index = NULL;
1394 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1395 LLVMValueRef res;
1396
1397 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1398 /* This is really a system value not a regular input */
1399 assert(!reg->Register.Indirect);
1400 assert(!reg->Dimension.Indirect);
1401 res = bld->system_values.prim_id;
1402 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1403 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1404 }
1405 return res;
1406 }
1407
1408 if (reg->Register.Indirect) {
1409 attrib_index = get_indirect_index(bld,
1410 reg->Register.File,
1411 reg->Register.Index,
1412 &reg->Indirect);
1413 } else {
1414 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1415 }
1416
1417 if (reg->Dimension.Indirect) {
1418 vertex_index = get_indirect_index(bld,
1419 reg->Register.File,
1420 reg->Dimension.Index,
1421 &reg->DimIndirect);
1422 } else {
1423 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1424 }
1425
1426 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1427 reg->Dimension.Indirect,
1428 vertex_index,
1429 reg->Register.Indirect,
1430 attrib_index,
1431 swizzle_index);
1432
1433 assert(res);
1434
1435 if (stype == TGSI_TYPE_UNSIGNED) {
1436 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1437 } else if (stype == TGSI_TYPE_SIGNED) {
1438 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1439 }
1440
1441 return res;
1442 }
1443
1444 static LLVMValueRef
1445 emit_fetch_temporary(
1446 struct lp_build_tgsi_context * bld_base,
1447 const struct tgsi_full_src_register * reg,
1448 enum tgsi_opcode_type stype,
1449 unsigned swizzle)
1450 {
1451 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1452 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1453 LLVMBuilderRef builder = gallivm->builder;
1454 LLVMValueRef res;
1455
1456 if (reg->Register.Indirect) {
1457 LLVMValueRef indirect_index;
1458 LLVMValueRef index_vec; /* index into the temp reg array */
1459 LLVMValueRef temps_array;
1460 LLVMTypeRef fptr_type;
1461
1462 indirect_index = get_indirect_index(bld,
1463 reg->Register.File,
1464 reg->Register.Index,
1465 &reg->Indirect);
1466
1467 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1468 indirect_index,
1469 swizzle,
1470 TRUE);
1471
1472 /* cast temps_array pointer to float* */
1473 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1474 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1475
1476 /* Gather values from the temporary register array */
1477 res = build_gather(bld_base, temps_array, index_vec, NULL);
1478 }
1479 else {
1480 LLVMValueRef temp_ptr;
1481 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1482 res = LLVMBuildLoad(builder, temp_ptr, "");
1483 }
1484
1485 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1486 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1487 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1488 }
1489
1490 return res;
1491 }
1492
1493 static LLVMValueRef
1494 emit_fetch_system_value(
1495 struct lp_build_tgsi_context * bld_base,
1496 const struct tgsi_full_src_register * reg,
1497 enum tgsi_opcode_type stype,
1498 unsigned swizzle)
1499 {
1500 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1501 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1502 const struct tgsi_shader_info *info = bld->bld_base.info;
1503 LLVMBuilderRef builder = gallivm->builder;
1504 LLVMValueRef res;
1505 enum tgsi_opcode_type atype; // Actual type of the value
1506
1507 assert(!reg->Register.Indirect);
1508
1509 switch (info->system_value_semantic_name[reg->Register.Index]) {
1510 case TGSI_SEMANTIC_INSTANCEID:
1511 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1512 atype = TGSI_TYPE_UNSIGNED;
1513 break;
1514
1515 case TGSI_SEMANTIC_VERTEXID:
1516 res = bld->system_values.vertex_id;
1517 atype = TGSI_TYPE_UNSIGNED;
1518 break;
1519
1520 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1521 res = bld->system_values.vertex_id_nobase;
1522 atype = TGSI_TYPE_UNSIGNED;
1523 break;
1524
1525 case TGSI_SEMANTIC_BASEVERTEX:
1526 res = bld->system_values.basevertex;
1527 atype = TGSI_TYPE_UNSIGNED;
1528 break;
1529
1530 case TGSI_SEMANTIC_PRIMID:
1531 res = bld->system_values.prim_id;
1532 atype = TGSI_TYPE_UNSIGNED;
1533 break;
1534
1535 case TGSI_SEMANTIC_INVOCATIONID:
1536 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1537 atype = TGSI_TYPE_UNSIGNED;
1538 break;
1539
1540 default:
1541 assert(!"unexpected semantic in emit_fetch_system_value");
1542 res = bld_base->base.zero;
1543 atype = TGSI_TYPE_FLOAT;
1544 break;
1545 }
1546
1547 if (atype != stype) {
1548 if (stype == TGSI_TYPE_FLOAT) {
1549 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1550 } else if (stype == TGSI_TYPE_UNSIGNED) {
1551 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1552 } else if (stype == TGSI_TYPE_SIGNED) {
1553 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1554 }
1555 }
1556
1557 return res;
1558 }
1559
1560 /**
1561 * Register fetch with derivatives.
1562 */
1563 static void
1564 emit_fetch_deriv(
1565 struct lp_build_tgsi_soa_context *bld,
1566 LLVMValueRef src,
1567 LLVMValueRef *res,
1568 LLVMValueRef *ddx,
1569 LLVMValueRef *ddy)
1570 {
1571 if(res)
1572 *res = src;
1573
1574 /* TODO: use interpolation coeffs for inputs */
1575
1576 if(ddx)
1577 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1578
1579 if(ddy)
1580 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1581 }
1582
1583
1584 /**
1585 * Predicate.
1586 */
1587 static void
1588 emit_fetch_predicate(
1589 struct lp_build_tgsi_soa_context *bld,
1590 const struct tgsi_full_instruction *inst,
1591 LLVMValueRef *pred)
1592 {
1593 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1594 unsigned index;
1595 unsigned char swizzles[4];
1596 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1597 LLVMValueRef value;
1598 unsigned chan;
1599
1600 if (!inst->Instruction.Predicate) {
1601 TGSI_FOR_EACH_CHANNEL( chan ) {
1602 pred[chan] = NULL;
1603 }
1604 return;
1605 }
1606
1607 swizzles[0] = inst->Predicate.SwizzleX;
1608 swizzles[1] = inst->Predicate.SwizzleY;
1609 swizzles[2] = inst->Predicate.SwizzleZ;
1610 swizzles[3] = inst->Predicate.SwizzleW;
1611
1612 index = inst->Predicate.Index;
1613 assert(index < LP_MAX_TGSI_PREDS);
1614
1615 TGSI_FOR_EACH_CHANNEL( chan ) {
1616 unsigned swizzle = swizzles[chan];
1617
1618 /*
1619 * Only fetch the predicate register channels that are actually listed
1620 * in the swizzles
1621 */
1622 if (!unswizzled[swizzle]) {
1623 value = LLVMBuildLoad(builder,
1624 bld->preds[index][swizzle], "");
1625
1626 /*
1627 * Convert the value to an integer mask.
1628 *
1629 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1630 * is needlessly causing two comparisons due to storing the intermediate
1631 * result as float vector instead of an integer mask vector.
1632 */
1633 value = lp_build_compare(bld->bld_base.base.gallivm,
1634 bld->bld_base.base.type,
1635 PIPE_FUNC_NOTEQUAL,
1636 value,
1637 bld->bld_base.base.zero);
1638 if (inst->Predicate.Negate) {
1639 value = LLVMBuildNot(builder, value, "");
1640 }
1641
1642 unswizzled[swizzle] = value;
1643 } else {
1644 value = unswizzled[swizzle];
1645 }
1646
1647 pred[chan] = value;
1648 }
1649 }
1650
1651
1652 /**
1653 * Register store.
1654 */
1655 static void
1656 emit_store_chan(
1657 struct lp_build_tgsi_context *bld_base,
1658 const struct tgsi_full_instruction *inst,
1659 unsigned index,
1660 unsigned chan_index,
1661 LLVMValueRef pred,
1662 LLVMValueRef value)
1663 {
1664 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1665 struct gallivm_state *gallivm = bld_base->base.gallivm;
1666 LLVMBuilderRef builder = gallivm->builder;
1667 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1668 struct lp_build_context *float_bld = &bld_base->base;
1669 struct lp_build_context *int_bld = &bld_base->int_bld;
1670 LLVMValueRef indirect_index = NULL;
1671 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1672
1673 /*
1674 * Apply saturation.
1675 *
1676 * It is always assumed to be float.
1677 */
1678 if (inst->Instruction.Saturate) {
1679 assert(dtype == TGSI_TYPE_FLOAT ||
1680 dtype == TGSI_TYPE_UNTYPED);
1681 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1682 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1683 }
1684
1685 if (reg->Register.Indirect) {
1686 indirect_index = get_indirect_index(bld,
1687 reg->Register.File,
1688 reg->Register.Index,
1689 &reg->Indirect);
1690 } else {
1691 assert(reg->Register.Index <=
1692 bld_base->info->file_max[reg->Register.File]);
1693 }
1694
1695 if (DEBUG_EXECUTION) {
1696 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1697 }
1698
1699 switch( reg->Register.File ) {
1700 case TGSI_FILE_OUTPUT:
1701 /* Outputs are always stored as floats */
1702 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1703
1704 if (reg->Register.Indirect) {
1705 LLVMValueRef index_vec; /* indexes into the output registers */
1706 LLVMValueRef outputs_array;
1707 LLVMTypeRef fptr_type;
1708
1709 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1710 indirect_index,
1711 chan_index,
1712 TRUE);
1713
1714 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1715 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1716
1717 /* Scatter store values into output registers */
1718 emit_mask_scatter(bld, outputs_array, index_vec, value,
1719 &bld->exec_mask, pred);
1720 }
1721 else {
1722 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1723 chan_index);
1724 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1725 }
1726 break;
1727
1728 case TGSI_FILE_TEMPORARY:
1729 /* Temporaries are always stored as floats */
1730 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1731
1732 if (reg->Register.Indirect) {
1733 LLVMValueRef index_vec; /* indexes into the temp registers */
1734 LLVMValueRef temps_array;
1735 LLVMTypeRef fptr_type;
1736
1737 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1738 indirect_index,
1739 chan_index,
1740 TRUE);
1741
1742 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1743 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1744
1745 /* Scatter store values into temp registers */
1746 emit_mask_scatter(bld, temps_array, index_vec, value,
1747 &bld->exec_mask, pred);
1748 }
1749 else {
1750 LLVMValueRef temp_ptr;
1751 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1752 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1753 }
1754 break;
1755
1756 case TGSI_FILE_ADDRESS:
1757 assert(dtype == TGSI_TYPE_SIGNED);
1758 assert(LLVMTypeOf(value) == int_bld->vec_type);
1759 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1760 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1761 bld->addr[reg->Register.Index][chan_index]);
1762 break;
1763
1764 case TGSI_FILE_PREDICATE:
1765 assert(LLVMTypeOf(value) == float_bld->vec_type);
1766 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1767 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1768 bld->preds[reg->Register.Index][chan_index]);
1769 break;
1770
1771 default:
1772 assert( 0 );
1773 }
1774
1775 (void)dtype;
1776 }
1777
1778 /*
1779 * Called at the beginning of the translation of each TGSI instruction, to
1780 * emit some debug code.
1781 */
1782 static void
1783 emit_debug(
1784 struct lp_build_tgsi_context * bld_base,
1785 const struct tgsi_full_instruction * inst,
1786 const struct tgsi_opcode_info * info)
1787
1788 {
1789 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1790
1791 if (DEBUG_EXECUTION) {
1792 /*
1793 * Dump the TGSI instruction.
1794 */
1795
1796 struct gallivm_state *gallivm = bld_base->base.gallivm;
1797 char buf[512];
1798 buf[0] = '$';
1799 buf[1] = ' ';
1800 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1801 lp_build_printf(gallivm, buf);
1802
1803 /* Dump the execution mask.
1804 */
1805 if (bld->exec_mask.has_mask) {
1806 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1807 }
1808 }
1809 }
1810
1811 static void
1812 emit_store(
1813 struct lp_build_tgsi_context * bld_base,
1814 const struct tgsi_full_instruction * inst,
1815 const struct tgsi_opcode_info * info,
1816 LLVMValueRef dst[4])
1817
1818 {
1819 unsigned chan_index;
1820 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1821
1822 if(info->num_dst) {
1823 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1824
1825 emit_fetch_predicate( bld, inst, pred );
1826
1827 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1828 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1829 }
1830 }
1831 }
1832
1833 static unsigned
1834 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1835 {
1836 switch (tgsi_target) {
1837 case TGSI_TEXTURE_BUFFER:
1838 return PIPE_BUFFER;
1839 case TGSI_TEXTURE_1D:
1840 case TGSI_TEXTURE_SHADOW1D:
1841 return PIPE_TEXTURE_1D;
1842 case TGSI_TEXTURE_2D:
1843 case TGSI_TEXTURE_SHADOW2D:
1844 case TGSI_TEXTURE_2D_MSAA:
1845 return PIPE_TEXTURE_2D;
1846 case TGSI_TEXTURE_3D:
1847 return PIPE_TEXTURE_3D;
1848 case TGSI_TEXTURE_CUBE:
1849 case TGSI_TEXTURE_SHADOWCUBE:
1850 return PIPE_TEXTURE_CUBE;
1851 case TGSI_TEXTURE_RECT:
1852 case TGSI_TEXTURE_SHADOWRECT:
1853 return PIPE_TEXTURE_RECT;
1854 case TGSI_TEXTURE_1D_ARRAY:
1855 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1856 return PIPE_TEXTURE_1D_ARRAY;
1857 case TGSI_TEXTURE_2D_ARRAY:
1858 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1859 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1860 return PIPE_TEXTURE_2D_ARRAY;
1861 case TGSI_TEXTURE_CUBE_ARRAY:
1862 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1863 return PIPE_TEXTURE_CUBE_ARRAY;
1864 default:
1865 assert(0);
1866 return PIPE_BUFFER;
1867 }
1868 }
1869
1870
1871 static enum lp_sampler_lod_property
1872 lp_build_lod_property(
1873 struct lp_build_tgsi_context *bld_base,
1874 const struct tgsi_full_instruction *inst,
1875 unsigned src_op)
1876 {
1877 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1878 enum lp_sampler_lod_property lod_property;
1879
1880 /*
1881 * Not much we can do here. We could try catching inputs declared
1882 * with constant interpolation but not sure it's worth it - since for
1883 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1884 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1885 * like the constant/immediate recognition below.
1886 * What seems to be of more value would be to recognize temps holding
1887 * broadcasted scalars but no way we can do it.
1888 * Tried asking llvm but without any success (using LLVMIsConstant
1889 * even though this isn't exactly what we'd need), even as simple as
1890 * IMM[0] UINT32 (0,-1,0,0)
1891 * MOV TEMP[0] IMM[0].yyyy
1892 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1893 * doesn't work.
1894 * This means there's ZERO chance this will ever catch a scalar lod
1895 * with traditional tex opcodes as well as texel fetches, since the lod
1896 * comes from the same reg as coords (except some test shaders using
1897 * constant coords maybe).
1898 * There's at least hope for sample opcodes as well as size queries.
1899 */
1900 if (reg->Register.File == TGSI_FILE_CONSTANT ||
1901 reg->Register.File == TGSI_FILE_IMMEDIATE) {
1902 lod_property = LP_SAMPLER_LOD_SCALAR;
1903 }
1904 else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
1905 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1906 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1907 }
1908 else {
1909 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1910 }
1911 }
1912 else {
1913 /* never use scalar (per-quad) lod the results are just too wrong. */
1914 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1915 }
1916 return lod_property;
1917 }
1918
1919
1920 /**
1921 * High-level instruction translators.
1922 */
1923
1924 static void
1925 emit_tex( struct lp_build_tgsi_soa_context *bld,
1926 const struct tgsi_full_instruction *inst,
1927 enum lp_build_tex_modifier modifier,
1928 LLVMValueRef *texel,
1929 unsigned sampler_reg,
1930 enum lp_sampler_op_type sampler_op)
1931 {
1932 unsigned unit = inst->Src[sampler_reg].Register.Index;
1933 LLVMValueRef oow = NULL;
1934 LLVMValueRef lod = NULL;
1935 LLVMValueRef coords[5];
1936 LLVMValueRef offsets[3] = { NULL };
1937 struct lp_derivatives derivs;
1938 struct lp_sampler_params params;
1939 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1940 unsigned num_derivs, num_offsets, i;
1941 unsigned shadow_coord = 0;
1942 unsigned layer_coord = 0;
1943 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
1944
1945 memset(&params, 0, sizeof(params));
1946
1947 if (!bld->sampler) {
1948 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1949 for (i = 0; i < 4; i++) {
1950 texel[i] = bld->bld_base.base.undef;
1951 }
1952 return;
1953 }
1954
1955 switch (inst->Texture.Texture) {
1956 case TGSI_TEXTURE_1D_ARRAY:
1957 layer_coord = 1;
1958 /* fallthrough */
1959 case TGSI_TEXTURE_1D:
1960 num_offsets = 1;
1961 num_derivs = 1;
1962 break;
1963 case TGSI_TEXTURE_2D_ARRAY:
1964 layer_coord = 2;
1965 /* fallthrough */
1966 case TGSI_TEXTURE_2D:
1967 case TGSI_TEXTURE_RECT:
1968 num_offsets = 2;
1969 num_derivs = 2;
1970 break;
1971 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1972 layer_coord = 1;
1973 /* fallthrough */
1974 case TGSI_TEXTURE_SHADOW1D:
1975 shadow_coord = 2;
1976 num_offsets = 1;
1977 num_derivs = 1;
1978 break;
1979 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1980 layer_coord = 2;
1981 shadow_coord = 3;
1982 num_offsets = 2;
1983 num_derivs = 2;
1984 break;
1985 case TGSI_TEXTURE_SHADOW2D:
1986 case TGSI_TEXTURE_SHADOWRECT:
1987 shadow_coord = 2;
1988 num_offsets = 2;
1989 num_derivs = 2;
1990 break;
1991 case TGSI_TEXTURE_CUBE:
1992 num_offsets = 2;
1993 num_derivs = 3;
1994 break;
1995 case TGSI_TEXTURE_3D:
1996 num_offsets = 3;
1997 num_derivs = 3;
1998 break;
1999 case TGSI_TEXTURE_SHADOWCUBE:
2000 shadow_coord = 3;
2001 num_offsets = 2;
2002 num_derivs = 3;
2003 break;
2004 case TGSI_TEXTURE_CUBE_ARRAY:
2005 num_offsets = 2;
2006 num_derivs = 3;
2007 layer_coord = 3;
2008 break;
2009 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2010 num_offsets = 2;
2011 num_derivs = 3;
2012 layer_coord = 3;
2013 shadow_coord = 4; /* shadow coord special different reg */
2014 break;
2015 case TGSI_TEXTURE_2D_MSAA:
2016 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2017 default:
2018 assert(0);
2019 return;
2020 }
2021
2022 /* Note lod and especially projected are illegal in a LOT of cases */
2023 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2024 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2025 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2026 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2027 /* note that shadow cube array with bias/explicit lod does not exist */
2028 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2029 }
2030 else {
2031 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2032 }
2033 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2034 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2035 }
2036 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2037 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2038 }
2039 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2040 }
2041
2042 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2043 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2044 oow = lp_build_rcp(&bld->bld_base.base, oow);
2045 }
2046
2047 for (i = 0; i < num_derivs; i++) {
2048 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2049 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2050 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2051 }
2052 for (i = num_derivs; i < 5; i++) {
2053 coords[i] = bld->bld_base.base.undef;
2054 }
2055
2056 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2057 if (layer_coord) {
2058 if (layer_coord == 3) {
2059 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2060 }
2061 else {
2062 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2063 }
2064 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2065 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2066 }
2067 /* Shadow coord occupies always 5th slot. */
2068 if (shadow_coord) {
2069 sample_key |= LP_SAMPLER_SHADOW;
2070 if (shadow_coord == 4) {
2071 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2072 }
2073 else {
2074 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2075 }
2076 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2077 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2078 }
2079
2080 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2081 unsigned dim;
2082 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2083 for (dim = 0; dim < num_derivs; ++dim) {
2084 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2085 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2086 }
2087 params.derivs = &derivs;
2088 /*
2089 * could also check all src regs if constant but I doubt such
2090 * cases exist in practice.
2091 */
2092 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2093 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2094 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2095 }
2096 else {
2097 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2098 }
2099 }
2100 else {
2101 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2102 }
2103 }
2104 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2105
2106 /* we don't handle the 4 offset version of tg4 */
2107 if (inst->Texture.NumOffsets == 1) {
2108 unsigned dim;
2109 sample_key |= LP_SAMPLER_OFFSETS;
2110 for (dim = 0; dim < num_offsets; dim++) {
2111 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2112 }
2113 }
2114
2115 params.type = bld->bld_base.base.type;
2116 params.sample_key = sample_key;
2117 params.texture_index = unit;
2118 params.sampler_index = unit;
2119 params.context_ptr = bld->context_ptr;
2120 params.coords = coords;
2121 params.offsets = offsets;
2122 params.lod = lod;
2123 params.texel = texel;
2124
2125 bld->sampler->emit_tex_sample(bld->sampler,
2126 bld->bld_base.base.gallivm,
2127 &params);
2128 }
2129
2130 static void
2131 emit_sample(struct lp_build_tgsi_soa_context *bld,
2132 const struct tgsi_full_instruction *inst,
2133 enum lp_build_tex_modifier modifier,
2134 boolean compare,
2135 LLVMValueRef *texel)
2136 {
2137 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2138 unsigned texture_unit, sampler_unit;
2139 LLVMValueRef lod = NULL;
2140 LLVMValueRef coords[5];
2141 LLVMValueRef offsets[3] = { NULL };
2142 struct lp_derivatives derivs;
2143 struct lp_sampler_params params;
2144 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2145
2146 unsigned num_offsets, num_derivs, i;
2147 unsigned layer_coord = 0;
2148 unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
2149
2150 memset(&params, 0, sizeof(params));
2151
2152 if (!bld->sampler) {
2153 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2154 for (i = 0; i < 4; i++) {
2155 texel[i] = bld->bld_base.base.undef;
2156 }
2157 return;
2158 }
2159
2160 /*
2161 * unlike old-style tex opcodes the texture/sampler indices
2162 * always come from src1 and src2 respectively.
2163 */
2164 texture_unit = inst->Src[1].Register.Index;
2165 sampler_unit = inst->Src[2].Register.Index;
2166
2167 /*
2168 * Note inst->Texture.Texture will contain the number of offsets,
2169 * however the target information is NOT there and comes from the
2170 * declared sampler views instead.
2171 */
2172 switch (bld->sv[texture_unit].Resource) {
2173 case TGSI_TEXTURE_1D:
2174 num_offsets = 1;
2175 num_derivs = 1;
2176 break;
2177 case TGSI_TEXTURE_1D_ARRAY:
2178 layer_coord = 1;
2179 num_offsets = 1;
2180 num_derivs = 1;
2181 break;
2182 case TGSI_TEXTURE_2D:
2183 case TGSI_TEXTURE_RECT:
2184 num_offsets = 2;
2185 num_derivs = 2;
2186 break;
2187 case TGSI_TEXTURE_2D_ARRAY:
2188 layer_coord = 2;
2189 num_offsets = 2;
2190 num_derivs = 2;
2191 break;
2192 case TGSI_TEXTURE_CUBE:
2193 num_offsets = 2;
2194 num_derivs = 3;
2195 break;
2196 case TGSI_TEXTURE_3D:
2197 num_offsets = 3;
2198 num_derivs = 3;
2199 break;
2200 case TGSI_TEXTURE_CUBE_ARRAY:
2201 layer_coord = 3;
2202 num_offsets = 2;
2203 num_derivs = 3;
2204 break;
2205 default:
2206 assert(0);
2207 return;
2208 }
2209
2210 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2211 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2212 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2213 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2214 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2215 }
2216 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2217 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2218 }
2219 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2220 }
2221 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2222 /* XXX might be better to explicitly pass the level zero information */
2223 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2224 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2225 }
2226
2227 for (i = 0; i < num_derivs; i++) {
2228 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2229 }
2230 for (i = num_derivs; i < 5; i++) {
2231 coords[i] = bld->bld_base.base.undef;
2232 }
2233
2234 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2235 if (layer_coord) {
2236 if (layer_coord == 3)
2237 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2238 else
2239 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2240 }
2241 /* Shadow coord occupies always 5th slot. */
2242 if (compare) {
2243 sample_key |= LP_SAMPLER_SHADOW;
2244 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2245 }
2246
2247 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2248 unsigned dim;
2249 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2250 for (dim = 0; dim < num_derivs; ++dim) {
2251 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2252 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2253 }
2254 params.derivs = &derivs;
2255 /*
2256 * could also check all src regs if constant but I doubt such
2257 * cases exist in practice.
2258 */
2259 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2260 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2261 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2262 }
2263 else {
2264 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2265 }
2266 }
2267 else {
2268 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2269 }
2270 }
2271
2272 /* some advanced gather instructions (txgo) would require 4 offsets */
2273 if (inst->Texture.NumOffsets == 1) {
2274 unsigned dim;
2275 sample_key |= LP_SAMPLER_OFFSETS;
2276 for (dim = 0; dim < num_offsets; dim++) {
2277 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2278 }
2279 }
2280 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2281
2282 params.type = bld->bld_base.base.type;
2283 params.sample_key = sample_key;
2284 params.texture_index = texture_unit;
2285 params.sampler_index = sampler_unit;
2286 params.context_ptr = bld->context_ptr;
2287 params.coords = coords;
2288 params.offsets = offsets;
2289 params.lod = lod;
2290 params.texel = texel;
2291
2292 bld->sampler->emit_tex_sample(bld->sampler,
2293 bld->bld_base.base.gallivm,
2294 &params);
2295
2296 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2297 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2298 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2299 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
2300 unsigned char swizzles[4];
2301 swizzles[0] = inst->Src[1].Register.SwizzleX;
2302 swizzles[1] = inst->Src[1].Register.SwizzleY;
2303 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2304 swizzles[3] = inst->Src[1].Register.SwizzleW;
2305
2306 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2307 }
2308 }
2309
2310 static void
2311 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2312 const struct tgsi_full_instruction *inst,
2313 LLVMValueRef *texel,
2314 boolean is_samplei)
2315 {
2316 unsigned unit, target;
2317 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2318 LLVMValueRef explicit_lod = NULL;
2319 LLVMValueRef coords[5];
2320 LLVMValueRef offsets[3] = { NULL };
2321 struct lp_sampler_params params;
2322 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2323 unsigned dims, i;
2324 unsigned layer_coord = 0;
2325 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2326
2327 memset(&params, 0, sizeof(params));
2328
2329 if (!bld->sampler) {
2330 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2331 for (i = 0; i < 4; i++) {
2332 texel[i] = coord_undef;
2333 }
2334 return;
2335 }
2336
2337 unit = inst->Src[1].Register.Index;
2338
2339 if (is_samplei) {
2340 target = bld->sv[unit].Resource;
2341 }
2342 else {
2343 target = inst->Texture.Texture;
2344 }
2345
2346 switch (target) {
2347 case TGSI_TEXTURE_1D:
2348 case TGSI_TEXTURE_BUFFER:
2349 dims = 1;
2350 break;
2351 case TGSI_TEXTURE_1D_ARRAY:
2352 layer_coord = 1;
2353 dims = 1;
2354 break;
2355 case TGSI_TEXTURE_2D:
2356 case TGSI_TEXTURE_RECT:
2357 case TGSI_TEXTURE_2D_MSAA:
2358 dims = 2;
2359 break;
2360 case TGSI_TEXTURE_2D_ARRAY:
2361 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2362 layer_coord = 2;
2363 dims = 2;
2364 break;
2365 case TGSI_TEXTURE_3D:
2366 dims = 3;
2367 break;
2368 default:
2369 assert(0);
2370 return;
2371 }
2372
2373 /* always have lod except for buffers and msaa targets ? */
2374 if (target != TGSI_TEXTURE_BUFFER &&
2375 target != TGSI_TEXTURE_2D_MSAA &&
2376 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2377 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2378 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2379 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2380 }
2381 /* XXX: for real msaa support, the w component would be the sample index. */
2382
2383 for (i = 0; i < dims; i++) {
2384 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2385 }
2386 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2387 for (i = dims; i < 5; i++) {
2388 coords[i] = coord_undef;
2389 }
2390 if (layer_coord)
2391 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2392
2393 if (inst->Texture.NumOffsets == 1) {
2394 unsigned dim;
2395 sample_key |= LP_SAMPLER_OFFSETS;
2396 for (dim = 0; dim < dims; dim++) {
2397 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2398 }
2399 }
2400 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2401
2402 params.type = bld->bld_base.base.type;
2403 params.sample_key = sample_key;
2404 params.texture_index = unit;
2405 params.sampler_index = unit;
2406 params.context_ptr = bld->context_ptr;
2407 params.coords = coords;
2408 params.offsets = offsets;
2409 params.derivs = NULL;
2410 params.lod = explicit_lod;
2411 params.texel = texel;
2412
2413 bld->sampler->emit_tex_sample(bld->sampler,
2414 bld->bld_base.base.gallivm,
2415 &params);
2416
2417 if (is_samplei &&
2418 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2419 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2420 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2421 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
2422 unsigned char swizzles[4];
2423 swizzles[0] = inst->Src[1].Register.SwizzleX;
2424 swizzles[1] = inst->Src[1].Register.SwizzleY;
2425 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2426 swizzles[3] = inst->Src[1].Register.SwizzleW;
2427
2428 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2429 }
2430 }
2431
2432 static void
2433 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2434 const struct tgsi_full_instruction *inst,
2435 LLVMValueRef *sizes_out,
2436 boolean is_sviewinfo)
2437 {
2438 LLVMValueRef explicit_lod;
2439 enum lp_sampler_lod_property lod_property;
2440 unsigned has_lod;
2441 unsigned i;
2442 unsigned unit = inst->Src[1].Register.Index;
2443 unsigned target, pipe_target;
2444
2445 if (is_sviewinfo) {
2446 target = bld->sv[unit].Resource;
2447 }
2448 else {
2449 target = inst->Texture.Texture;
2450 }
2451 switch (target) {
2452 case TGSI_TEXTURE_BUFFER:
2453 case TGSI_TEXTURE_RECT:
2454 case TGSI_TEXTURE_SHADOWRECT:
2455 has_lod = 0;
2456 break;
2457 default:
2458 has_lod = 1;
2459 break;
2460 }
2461
2462 if (!bld->sampler) {
2463 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2464 for (i = 0; i < 4; i++)
2465 sizes_out[i] = bld->bld_base.int_bld.undef;
2466 return;
2467 }
2468
2469 if (has_lod) {
2470 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2471 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2472 }
2473 else {
2474 explicit_lod = NULL;
2475 lod_property = LP_SAMPLER_LOD_SCALAR;
2476 }
2477
2478
2479 pipe_target = tgsi_to_pipe_tex_target(target);
2480
2481 bld->sampler->emit_size_query(bld->sampler,
2482 bld->bld_base.base.gallivm,
2483 bld->bld_base.int_bld.type,
2484 unit, pipe_target,
2485 bld->context_ptr,
2486 TRUE,
2487 lod_property,
2488 explicit_lod,
2489 sizes_out);
2490 }
2491
2492 static boolean
2493 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2494 int pc)
2495 {
2496 int i;
2497
2498 for (i = 0; i < 5; i++) {
2499 unsigned opcode;
2500
2501 if (pc + i >= bld->bld_base.info->num_instructions)
2502 return TRUE;
2503
2504 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2505
2506 if (opcode == TGSI_OPCODE_END)
2507 return TRUE;
2508
2509 if (opcode == TGSI_OPCODE_TEX ||
2510 opcode == TGSI_OPCODE_TXP ||
2511 opcode == TGSI_OPCODE_TXD ||
2512 opcode == TGSI_OPCODE_TXB ||
2513 opcode == TGSI_OPCODE_TXL ||
2514 opcode == TGSI_OPCODE_TXF ||
2515 opcode == TGSI_OPCODE_TXQ ||
2516 opcode == TGSI_OPCODE_TEX2 ||
2517 opcode == TGSI_OPCODE_TXB2 ||
2518 opcode == TGSI_OPCODE_TXL2 ||
2519 opcode == TGSI_OPCODE_SAMPLE ||
2520 opcode == TGSI_OPCODE_SAMPLE_B ||
2521 opcode == TGSI_OPCODE_SAMPLE_C ||
2522 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2523 opcode == TGSI_OPCODE_SAMPLE_D ||
2524 opcode == TGSI_OPCODE_SAMPLE_I ||
2525 opcode == TGSI_OPCODE_SAMPLE_L ||
2526 opcode == TGSI_OPCODE_SVIEWINFO ||
2527 opcode == TGSI_OPCODE_CAL ||
2528 opcode == TGSI_OPCODE_CALLNZ ||
2529 opcode == TGSI_OPCODE_IF ||
2530 opcode == TGSI_OPCODE_UIF ||
2531 opcode == TGSI_OPCODE_BGNLOOP ||
2532 opcode == TGSI_OPCODE_SWITCH)
2533 return FALSE;
2534 }
2535
2536 return TRUE;
2537 }
2538
2539
2540
2541 /**
2542 * Kill fragment if any of the src register values are negative.
2543 */
2544 static void
2545 emit_kill_if(
2546 struct lp_build_tgsi_soa_context *bld,
2547 const struct tgsi_full_instruction *inst,
2548 int pc)
2549 {
2550 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2551 const struct tgsi_full_src_register *reg = &inst->Src[0];
2552 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2553 LLVMValueRef mask;
2554 unsigned chan_index;
2555
2556 memset(&terms, 0, sizeof terms);
2557
2558 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2559 unsigned swizzle;
2560
2561 /* Unswizzle channel */
2562 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2563
2564 /* Check if the component has not been already tested. */
2565 assert(swizzle < TGSI_NUM_CHANNELS);
2566 if( !terms[swizzle] )
2567 /* TODO: change the comparison operator instead of setting the sign */
2568 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2569 }
2570
2571 mask = NULL;
2572 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2573 if(terms[chan_index]) {
2574 LLVMValueRef chan_mask;
2575
2576 /*
2577 * If term < 0 then mask = 0 else mask = ~0.
2578 */
2579 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2580
2581 if(mask)
2582 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2583 else
2584 mask = chan_mask;
2585 }
2586 }
2587
2588 if (bld->exec_mask.has_mask) {
2589 LLVMValueRef invmask;
2590 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2591 mask = LLVMBuildOr(builder, mask, invmask, "");
2592 }
2593
2594 lp_build_mask_update(bld->mask, mask);
2595 if (!near_end_of_shader(bld, pc))
2596 lp_build_mask_check(bld->mask);
2597 }
2598
2599
2600 /**
2601 * Unconditional fragment kill.
2602 * The only predication is the execution mask which will apply if
2603 * we're inside a loop or conditional.
2604 */
2605 static void
2606 emit_kill(struct lp_build_tgsi_soa_context *bld,
2607 int pc)
2608 {
2609 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2610 LLVMValueRef mask;
2611
2612 /* For those channels which are "alive", disable fragment shader
2613 * execution.
2614 */
2615 if (bld->exec_mask.has_mask) {
2616 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2617 }
2618 else {
2619 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2620 mask = zero;
2621 }
2622
2623 lp_build_mask_update(bld->mask, mask);
2624
2625 if (!near_end_of_shader(bld, pc))
2626 lp_build_mask_check(bld->mask);
2627 }
2628
2629
2630 /**
2631 * Emit code which will dump the value of all the temporary registers
2632 * to stdout.
2633 */
2634 static void
2635 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2636 unsigned file)
2637 {
2638 const struct tgsi_shader_info *info = bld->bld_base.info;
2639 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2640 LLVMBuilderRef builder = gallivm->builder;
2641 LLVMValueRef reg_ptr;
2642 int index;
2643 int max_index = info->file_max[file];
2644
2645 /*
2646 * Some register files, particularly constants, can be very large,
2647 * and dumping everything could make this unusably slow.
2648 */
2649 max_index = MIN2(max_index, 32);
2650
2651 for (index = 0; index <= max_index; index++) {
2652 LLVMValueRef res;
2653 unsigned mask;
2654 int chan;
2655
2656 if (index < 8 * sizeof(unsigned) &&
2657 (info->file_mask[file] & (1 << index)) == 0) {
2658 /* This was not declared.*/
2659 continue;
2660 }
2661
2662 if (file == TGSI_FILE_INPUT) {
2663 mask = info->input_usage_mask[index];
2664 } else {
2665 mask = TGSI_WRITEMASK_XYZW;
2666 }
2667
2668 for (chan = 0; chan < 4; chan++) {
2669 if ((mask & (1 << chan)) == 0) {
2670 /* This channel is not used.*/
2671 continue;
2672 }
2673
2674 if (file == TGSI_FILE_CONSTANT) {
2675 struct tgsi_full_src_register reg;
2676 memset(&reg, 0, sizeof reg);
2677 reg.Register.File = file;
2678 reg.Register.Index = index;
2679 reg.Register.SwizzleX = 0;
2680 reg.Register.SwizzleY = 1;
2681 reg.Register.SwizzleZ = 2;
2682 reg.Register.SwizzleW = 3;
2683
2684 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2685 if (!res) {
2686 continue;
2687 }
2688 } else if (file == TGSI_FILE_INPUT) {
2689 res = bld->inputs[index][chan];
2690 if (!res) {
2691 continue;
2692 }
2693 } else if (file == TGSI_FILE_TEMPORARY) {
2694 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2695 assert(reg_ptr);
2696 res = LLVMBuildLoad(builder, reg_ptr, "");
2697 } else if (file == TGSI_FILE_OUTPUT) {
2698 reg_ptr = lp_get_output_ptr(bld, index, chan);
2699 assert(reg_ptr);
2700 res = LLVMBuildLoad(builder, reg_ptr, "");
2701 } else {
2702 assert(0);
2703 continue;
2704 }
2705
2706 emit_dump_reg(gallivm, file, index, chan, res);
2707 }
2708 }
2709 }
2710
2711
2712
2713 void
2714 lp_emit_declaration_soa(
2715 struct lp_build_tgsi_context *bld_base,
2716 const struct tgsi_full_declaration *decl)
2717 {
2718 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2719 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2720 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2721 const unsigned first = decl->Range.First;
2722 const unsigned last = decl->Range.Last;
2723 unsigned idx, i;
2724
2725 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2726
2727 switch (decl->Declaration.File) {
2728 case TGSI_FILE_TEMPORARY:
2729 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2730 assert(last < LP_MAX_INLINED_TEMPS);
2731 for (idx = first; idx <= last; ++idx) {
2732 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2733 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2734 }
2735 }
2736 break;
2737
2738 case TGSI_FILE_OUTPUT:
2739 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2740 for (idx = first; idx <= last; ++idx) {
2741 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2742 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2743 vec_type, "output");
2744 }
2745 }
2746 break;
2747
2748 case TGSI_FILE_ADDRESS:
2749 /* ADDR registers are only allocated with an integer LLVM IR type,
2750 * as they are guaranteed to always have integers.
2751 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2752 * an ADDR register for that matter).
2753 */
2754 assert(last < LP_MAX_TGSI_ADDRS);
2755 for (idx = first; idx <= last; ++idx) {
2756 assert(idx < LP_MAX_TGSI_ADDRS);
2757 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2758 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2759 }
2760 break;
2761
2762 case TGSI_FILE_PREDICATE:
2763 assert(last < LP_MAX_TGSI_PREDS);
2764 for (idx = first; idx <= last; ++idx) {
2765 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2766 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2767 "predicate");
2768 }
2769 break;
2770
2771 case TGSI_FILE_SAMPLER_VIEW:
2772 /*
2773 * The target stored here MUST match whatever there actually
2774 * is in the set sampler views (what about return type?).
2775 */
2776 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2777 for (idx = first; idx <= last; ++idx) {
2778 bld->sv[idx] = decl->SamplerView;
2779 }
2780 break;
2781
2782 case TGSI_FILE_CONSTANT:
2783 {
2784 /*
2785 * We could trivially fetch the per-buffer pointer when fetching the
2786 * constant, relying on llvm to figure out it's always the same pointer
2787 * anyway. However, doing so results in a huge (more than factor of 10)
2788 * slowdown in llvm compilation times for some (but not all) shaders
2789 * (more specifically, the IR optimization spends way more time in
2790 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2791 */
2792 unsigned idx2D = decl->Dim.Index2D;
2793 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2794 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2795 bld->consts[idx2D] =
2796 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2797 bld->consts_sizes[idx2D] =
2798 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2799 }
2800 break;
2801
2802 default:
2803 /* don't need to declare other vars */
2804 break;
2805 }
2806 }
2807
2808
2809 void lp_emit_immediate_soa(
2810 struct lp_build_tgsi_context *bld_base,
2811 const struct tgsi_full_immediate *imm)
2812 {
2813 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2814 struct gallivm_state * gallivm = bld_base->base.gallivm;
2815 LLVMValueRef imms[4];
2816 unsigned i;
2817 const uint size = imm->Immediate.NrTokens - 1;
2818 assert(size <= 4);
2819 switch (imm->Immediate.DataType) {
2820 case TGSI_IMM_FLOAT32:
2821 for( i = 0; i < size; ++i )
2822 imms[i] =
2823 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2824
2825 break;
2826 case TGSI_IMM_UINT32:
2827 for( i = 0; i < size; ++i ) {
2828 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2829 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2830 }
2831
2832 break;
2833 case TGSI_IMM_INT32:
2834 for( i = 0; i < size; ++i ) {
2835 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2836 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2837 }
2838
2839 break;
2840 }
2841 for( i = size; i < 4; ++i )
2842 imms[i] = bld_base->base.undef;
2843
2844 if (bld->use_immediates_array) {
2845 unsigned index = bld->num_immediates;
2846 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2847 LLVMBuilderRef builder = gallivm->builder;
2848
2849 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2850 for (i = 0; i < 4; ++i ) {
2851 LLVMValueRef lindex = lp_build_const_int32(
2852 bld->bld_base.base.gallivm, index * 4 + i);
2853 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2854 bld->imms_array, &lindex, 1, "");
2855 LLVMBuildStore(builder, imms[i], imm_ptr);
2856 }
2857 } else {
2858 /* simply copy the immediate values into the next immediates[] slot */
2859 unsigned i;
2860 const uint size = imm->Immediate.NrTokens - 1;
2861 assert(size <= 4);
2862 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2863
2864 for(i = 0; i < 4; ++i )
2865 bld->immediates[bld->num_immediates][i] = imms[i];
2866
2867 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2868 unsigned index = bld->num_immediates;
2869 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2870 LLVMBuilderRef builder = gallivm->builder;
2871 for (i = 0; i < 4; ++i ) {
2872 LLVMValueRef lindex = lp_build_const_int32(
2873 bld->bld_base.base.gallivm, index * 4 + i);
2874 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2875 bld->imms_array, &lindex, 1, "");
2876 LLVMBuildStore(builder,
2877 bld->immediates[index][i],
2878 imm_ptr);
2879 }
2880 }
2881 }
2882
2883 bld->num_immediates++;
2884 }
2885
2886 static void
2887 ddx_emit(
2888 const struct lp_build_tgsi_action * action,
2889 struct lp_build_tgsi_context * bld_base,
2890 struct lp_build_emit_data * emit_data)
2891 {
2892 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2893
2894 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2895 &emit_data->output[emit_data->chan], NULL);
2896 }
2897
2898 static void
2899 ddy_emit(
2900 const struct lp_build_tgsi_action * action,
2901 struct lp_build_tgsi_context * bld_base,
2902 struct lp_build_emit_data * emit_data)
2903 {
2904 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2905
2906 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2907 &emit_data->output[emit_data->chan]);
2908 }
2909
2910 static void
2911 kill_emit(
2912 const struct lp_build_tgsi_action * action,
2913 struct lp_build_tgsi_context * bld_base,
2914 struct lp_build_emit_data * emit_data)
2915 {
2916 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2917
2918 emit_kill(bld, bld_base->pc - 1);
2919 }
2920
2921 static void
2922 kill_if_emit(
2923 const struct lp_build_tgsi_action * action,
2924 struct lp_build_tgsi_context * bld_base,
2925 struct lp_build_emit_data * emit_data)
2926 {
2927 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2928
2929 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2930 }
2931
2932 static void
2933 tex_emit(
2934 const struct lp_build_tgsi_action * action,
2935 struct lp_build_tgsi_context * bld_base,
2936 struct lp_build_emit_data * emit_data)
2937 {
2938 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2939
2940 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2941 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2942 }
2943
2944 static void
2945 tex2_emit(
2946 const struct lp_build_tgsi_action * action,
2947 struct lp_build_tgsi_context * bld_base,
2948 struct lp_build_emit_data * emit_data)
2949 {
2950 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2951
2952 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2953 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2954 }
2955
2956 static void
2957 txb_emit(
2958 const struct lp_build_tgsi_action * action,
2959 struct lp_build_tgsi_context * bld_base,
2960 struct lp_build_emit_data * emit_data)
2961 {
2962 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2963
2964 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2965 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2966 }
2967
2968 static void
2969 txb2_emit(
2970 const struct lp_build_tgsi_action * action,
2971 struct lp_build_tgsi_context * bld_base,
2972 struct lp_build_emit_data * emit_data)
2973 {
2974 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2975
2976 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2977 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2978 }
2979
2980 static void
2981 txd_emit(
2982 const struct lp_build_tgsi_action * action,
2983 struct lp_build_tgsi_context * bld_base,
2984 struct lp_build_emit_data * emit_data)
2985 {
2986 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2987
2988 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2989 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
2990 }
2991
2992 static void
2993 txl_emit(
2994 const struct lp_build_tgsi_action * action,
2995 struct lp_build_tgsi_context * bld_base,
2996 struct lp_build_emit_data * emit_data)
2997 {
2998 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2999
3000 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3001 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3002 }
3003
3004 static void
3005 txl2_emit(
3006 const struct lp_build_tgsi_action * action,
3007 struct lp_build_tgsi_context * bld_base,
3008 struct lp_build_emit_data * emit_data)
3009 {
3010 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3011
3012 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3013 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3014 }
3015
3016 static void
3017 txp_emit(
3018 const struct lp_build_tgsi_action * action,
3019 struct lp_build_tgsi_context * bld_base,
3020 struct lp_build_emit_data * emit_data)
3021 {
3022 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3023
3024 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3025 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3026 }
3027
3028 static void
3029 tg4_emit(
3030 const struct lp_build_tgsi_action * action,
3031 struct lp_build_tgsi_context * bld_base,
3032 struct lp_build_emit_data * emit_data)
3033 {
3034 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3035
3036 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3037 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3038 }
3039
3040 static void
3041 txq_emit(
3042 const struct lp_build_tgsi_action * action,
3043 struct lp_build_tgsi_context * bld_base,
3044 struct lp_build_emit_data * emit_data)
3045 {
3046 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3047
3048 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3049 }
3050
3051 static void
3052 txf_emit(
3053 const struct lp_build_tgsi_action * action,
3054 struct lp_build_tgsi_context * bld_base,
3055 struct lp_build_emit_data * emit_data)
3056 {
3057 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3058
3059 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3060 }
3061
3062 static void
3063 sample_i_emit(
3064 const struct lp_build_tgsi_action * action,
3065 struct lp_build_tgsi_context * bld_base,
3066 struct lp_build_emit_data * emit_data)
3067 {
3068 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3069
3070 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3071 }
3072
3073 static void
3074 sample_emit(
3075 const struct lp_build_tgsi_action * action,
3076 struct lp_build_tgsi_context * bld_base,
3077 struct lp_build_emit_data * emit_data)
3078 {
3079 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3080
3081 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3082 FALSE, emit_data->output);
3083 }
3084
3085 static void
3086 sample_b_emit(
3087 const struct lp_build_tgsi_action * action,
3088 struct lp_build_tgsi_context * bld_base,
3089 struct lp_build_emit_data * emit_data)
3090 {
3091 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3092
3093 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3094 FALSE, emit_data->output);
3095 }
3096
3097 static void
3098 sample_c_emit(
3099 const struct lp_build_tgsi_action * action,
3100 struct lp_build_tgsi_context * bld_base,
3101 struct lp_build_emit_data * emit_data)
3102 {
3103 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3104
3105 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3106 TRUE, emit_data->output);
3107 }
3108
3109 static void
3110 sample_c_lz_emit(
3111 const struct lp_build_tgsi_action * action,
3112 struct lp_build_tgsi_context * bld_base,
3113 struct lp_build_emit_data * emit_data)
3114 {
3115 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3116
3117 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3118 TRUE, emit_data->output);
3119 }
3120
3121 static void
3122 sample_d_emit(
3123 const struct lp_build_tgsi_action * action,
3124 struct lp_build_tgsi_context * bld_base,
3125 struct lp_build_emit_data * emit_data)
3126 {
3127 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3128
3129 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3130 FALSE, emit_data->output);
3131 }
3132
3133 static void
3134 sample_l_emit(
3135 const struct lp_build_tgsi_action * action,
3136 struct lp_build_tgsi_context * bld_base,
3137 struct lp_build_emit_data * emit_data)
3138 {
3139 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3140
3141 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3142 FALSE, emit_data->output);
3143 }
3144
3145 static void
3146 sviewinfo_emit(
3147 const struct lp_build_tgsi_action * action,
3148 struct lp_build_tgsi_context * bld_base,
3149 struct lp_build_emit_data * emit_data)
3150 {
3151 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3152
3153 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3154 }
3155
3156 static LLVMValueRef
3157 mask_vec(struct lp_build_tgsi_context *bld_base)
3158 {
3159 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3160 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3161 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3162
3163 if (!exec_mask->has_mask) {
3164 return lp_build_mask_value(bld->mask);
3165 }
3166 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3167 exec_mask->exec_mask, "");
3168 }
3169
3170 static void
3171 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3172 LLVMValueRef ptr,
3173 LLVMValueRef mask)
3174 {
3175 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3176 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3177
3178 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3179
3180 LLVMBuildStore(builder, current_vec, ptr);
3181 }
3182
3183 static void
3184 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3185 LLVMValueRef ptr,
3186 LLVMValueRef mask)
3187 {
3188 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3189 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3190
3191 current_vec = lp_build_select(&bld_base->uint_bld,
3192 mask,
3193 bld_base->uint_bld.zero,
3194 current_vec);
3195
3196 LLVMBuildStore(builder, current_vec, ptr);
3197 }
3198
3199 static LLVMValueRef
3200 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3201 LLVMValueRef current_mask_vec,
3202 LLVMValueRef total_emitted_vertices_vec)
3203 {
3204 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3205 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3206 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3207 total_emitted_vertices_vec,
3208 bld->max_output_vertices_vec);
3209
3210 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3211 }
3212
3213 static void
3214 emit_vertex(
3215 const struct lp_build_tgsi_action * action,
3216 struct lp_build_tgsi_context * bld_base,
3217 struct lp_build_emit_data * emit_data)
3218 {
3219 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3220 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3221
3222 if (bld->gs_iface->emit_vertex) {
3223 LLVMValueRef mask = mask_vec(bld_base);
3224 LLVMValueRef total_emitted_vertices_vec =
3225 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3226 mask = clamp_mask_to_max_output_vertices(bld, mask,
3227 total_emitted_vertices_vec);
3228 gather_outputs(bld);
3229 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3230 bld->outputs,
3231 total_emitted_vertices_vec);
3232 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3233 mask);
3234 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3235 mask);
3236 #if DUMP_GS_EMITS
3237 lp_build_print_value(bld->bld_base.base.gallivm,
3238 " +++ emit vertex masked ones = ",
3239 mask);
3240 lp_build_print_value(bld->bld_base.base.gallivm,
3241 " +++ emit vertex emitted = ",
3242 total_emitted_vertices_vec);
3243 #endif
3244 }
3245 }
3246
3247
3248 static void
3249 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3250 LLVMValueRef mask)
3251 {
3252 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3253 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3254
3255 if (bld->gs_iface->end_primitive) {
3256 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3257 LLVMValueRef emitted_vertices_vec =
3258 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3259 LLVMValueRef emitted_prims_vec =
3260 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3261
3262 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3263 emitted_vertices_vec,
3264 uint_bld->zero);
3265 /* We need to combine the current execution mask with the mask
3266 telling us which, if any, execution slots actually have
3267 unemitted primitives, this way we make sure that end_primitives
3268 executes only on the paths that have unflushed vertices */
3269 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3270
3271 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3272 emitted_vertices_vec,
3273 emitted_prims_vec);
3274
3275 #if DUMP_GS_EMITS
3276 lp_build_print_value(bld->bld_base.base.gallivm,
3277 " +++ end prim masked ones = ",
3278 mask);
3279 lp_build_print_value(bld->bld_base.base.gallivm,
3280 " +++ end prim emitted verts1 = ",
3281 emitted_vertices_vec);
3282 lp_build_print_value(bld->bld_base.base.gallivm,
3283 " +++ end prim emitted prims1 = ",
3284 LLVMBuildLoad(builder,
3285 bld->emitted_prims_vec_ptr, ""));
3286 #endif
3287 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3288 mask);
3289 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3290 mask);
3291 #if DUMP_GS_EMITS
3292 lp_build_print_value(bld->bld_base.base.gallivm,
3293 " +++ end prim emitted verts2 = ",
3294 LLVMBuildLoad(builder,
3295 bld->emitted_vertices_vec_ptr, ""));
3296 #endif
3297 }
3298
3299 }
3300
3301 static void
3302 end_primitive(
3303 const struct lp_build_tgsi_action * action,
3304 struct lp_build_tgsi_context * bld_base,
3305 struct lp_build_emit_data * emit_data)
3306 {
3307 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3308
3309 if (bld->gs_iface->end_primitive) {
3310 LLVMValueRef mask = mask_vec(bld_base);
3311 end_primitive_masked(bld_base, mask);
3312 }
3313 }
3314
3315 static void
3316 cal_emit(
3317 const struct lp_build_tgsi_action * action,
3318 struct lp_build_tgsi_context * bld_base,
3319 struct lp_build_emit_data * emit_data)
3320 {
3321 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322
3323 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3324 &bld_base->pc);
3325 }
3326
3327 static void
3328 ret_emit(
3329 const struct lp_build_tgsi_action * action,
3330 struct lp_build_tgsi_context * bld_base,
3331 struct lp_build_emit_data * emit_data)
3332 {
3333 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3334
3335 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3336 }
3337
3338 static void
3339 brk_emit(
3340 const struct lp_build_tgsi_action * action,
3341 struct lp_build_tgsi_context * bld_base,
3342 struct lp_build_emit_data * emit_data)
3343 {
3344 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3345
3346 lp_exec_break(&bld->exec_mask, bld_base);
3347 }
3348
3349 static void
3350 breakc_emit(
3351 const struct lp_build_tgsi_action * action,
3352 struct lp_build_tgsi_context * bld_base,
3353 struct lp_build_emit_data * emit_data)
3354 {
3355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3356 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3357 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3358 LLVMValueRef unsigned_cond =
3359 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3360 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3361 unsigned_cond,
3362 uint_bld->zero);
3363
3364 lp_exec_break_condition(&bld->exec_mask, cond);
3365 }
3366
3367 static void
3368 if_emit(
3369 const struct lp_build_tgsi_action * action,
3370 struct lp_build_tgsi_context * bld_base,
3371 struct lp_build_emit_data * emit_data)
3372 {
3373 LLVMValueRef tmp;
3374 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3375
3376 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3377 emit_data->args[0], bld->bld_base.base.zero);
3378 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3379 }
3380
3381 static void
3382 uif_emit(
3383 const struct lp_build_tgsi_action * action,
3384 struct lp_build_tgsi_context * bld_base,
3385 struct lp_build_emit_data * emit_data)
3386 {
3387 LLVMValueRef tmp;
3388 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3389 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3390
3391 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3392 emit_data->args[0], uint_bld->zero);
3393 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3394 }
3395
3396 static void
3397 case_emit(
3398 const struct lp_build_tgsi_action * action,
3399 struct lp_build_tgsi_context * bld_base,
3400 struct lp_build_emit_data * emit_data)
3401 {
3402 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3403
3404 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3405 }
3406
3407 static void
3408 default_emit(
3409 const struct lp_build_tgsi_action * action,
3410 struct lp_build_tgsi_context * bld_base,
3411 struct lp_build_emit_data * emit_data)
3412 {
3413 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3414
3415 lp_exec_default(&bld->exec_mask, bld_base);
3416 }
3417
3418 static void
3419 switch_emit(
3420 const struct lp_build_tgsi_action * action,
3421 struct lp_build_tgsi_context * bld_base,
3422 struct lp_build_emit_data * emit_data)
3423 {
3424 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3425
3426 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3427 }
3428
3429 static void
3430 endswitch_emit(
3431 const struct lp_build_tgsi_action * action,
3432 struct lp_build_tgsi_context * bld_base,
3433 struct lp_build_emit_data * emit_data)
3434 {
3435 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3436
3437 lp_exec_endswitch(&bld->exec_mask, bld_base);
3438 }
3439
3440 static void
3441 bgnloop_emit(
3442 const struct lp_build_tgsi_action * action,
3443 struct lp_build_tgsi_context * bld_base,
3444 struct lp_build_emit_data * emit_data)
3445 {
3446 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3447
3448 lp_exec_bgnloop(&bld->exec_mask);
3449 }
3450
3451 static void
3452 bgnsub_emit(
3453 const struct lp_build_tgsi_action * action,
3454 struct lp_build_tgsi_context * bld_base,
3455 struct lp_build_emit_data * emit_data)
3456 {
3457 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3458
3459 lp_exec_mask_bgnsub(&bld->exec_mask);
3460 }
3461
3462 static void
3463 else_emit(
3464 const struct lp_build_tgsi_action * action,
3465 struct lp_build_tgsi_context * bld_base,
3466 struct lp_build_emit_data * emit_data)
3467 {
3468 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3469
3470 lp_exec_mask_cond_invert(&bld->exec_mask);
3471 }
3472
3473 static void
3474 endif_emit(
3475 const struct lp_build_tgsi_action * action,
3476 struct lp_build_tgsi_context * bld_base,
3477 struct lp_build_emit_data * emit_data)
3478 {
3479 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3480
3481 lp_exec_mask_cond_pop(&bld->exec_mask);
3482 }
3483
3484 static void
3485 endloop_emit(
3486 const struct lp_build_tgsi_action * action,
3487 struct lp_build_tgsi_context * bld_base,
3488 struct lp_build_emit_data * emit_data)
3489 {
3490 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3491
3492 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3493 }
3494
3495 static void
3496 endsub_emit(
3497 const struct lp_build_tgsi_action * action,
3498 struct lp_build_tgsi_context * bld_base,
3499 struct lp_build_emit_data * emit_data)
3500 {
3501 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3502
3503 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3504 }
3505
3506 static void
3507 cont_emit(
3508 const struct lp_build_tgsi_action * action,
3509 struct lp_build_tgsi_context * bld_base,
3510 struct lp_build_emit_data * emit_data)
3511 {
3512 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3513
3514 lp_exec_continue(&bld->exec_mask);
3515 }
3516
3517 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3518 {
3519 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3520 struct gallivm_state * gallivm = bld_base->base.gallivm;
3521
3522 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3523 LLVMValueRef array_size =
3524 lp_build_const_int32(gallivm,
3525 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3526 bld->temps_array = lp_build_array_alloca(gallivm,
3527 bld_base->base.vec_type, array_size,
3528 "temp_array");
3529 }
3530
3531 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3532 LLVMValueRef array_size =
3533 lp_build_const_int32(gallivm,
3534 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3535 bld->outputs_array = lp_build_array_alloca(gallivm,
3536 bld_base->base.vec_type, array_size,
3537 "output_array");
3538 }
3539
3540 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3541 LLVMValueRef array_size =
3542 lp_build_const_int32(gallivm,
3543 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3544 bld->imms_array = lp_build_array_alloca(gallivm,
3545 bld_base->base.vec_type, array_size,
3546 "imms_array");
3547 }
3548
3549 /* If we have indirect addressing in inputs we need to copy them into
3550 * our alloca array to be able to iterate over them */
3551 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3552 unsigned index, chan;
3553 LLVMTypeRef vec_type = bld_base->base.vec_type;
3554 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3555 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3556 bld->inputs_array = lp_build_array_alloca(gallivm,
3557 vec_type, array_size,
3558 "input_array");
3559
3560 assert(bld_base->info->num_inputs
3561 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3562
3563 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3564 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3565 LLVMValueRef lindex =
3566 lp_build_const_int32(gallivm, index * 4 + chan);
3567 LLVMValueRef input_ptr =
3568 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3569 &lindex, 1, "");
3570 LLVMValueRef value = bld->inputs[index][chan];
3571 if (value)
3572 LLVMBuildStore(gallivm->builder, value, input_ptr);
3573 }
3574 }
3575 }
3576
3577 if (bld->gs_iface) {
3578 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3579 bld->emitted_prims_vec_ptr =
3580 lp_build_alloca(gallivm,
3581 uint_bld->vec_type,
3582 "emitted_prims_ptr");
3583 bld->emitted_vertices_vec_ptr =
3584 lp_build_alloca(gallivm,
3585 uint_bld->vec_type,
3586 "emitted_vertices_ptr");
3587 bld->total_emitted_vertices_vec_ptr =
3588 lp_build_alloca(gallivm,
3589 uint_bld->vec_type,
3590 "total_emitted_vertices_ptr");
3591
3592 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3593 bld->emitted_prims_vec_ptr);
3594 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3595 bld->emitted_vertices_vec_ptr);
3596 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3597 bld->total_emitted_vertices_vec_ptr);
3598 }
3599
3600 if (DEBUG_EXECUTION) {
3601 lp_build_printf(gallivm, "\n");
3602 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3603 if (!bld->gs_iface)
3604 emit_dump_file(bld, TGSI_FILE_INPUT);
3605 }
3606 }
3607
3608 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3609 {
3610 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3611 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3612
3613 if (DEBUG_EXECUTION) {
3614 /* for debugging */
3615 if (0) {
3616 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3617 }
3618 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3619 lp_build_printf(bld_base->base.gallivm, "\n");
3620 }
3621
3622 /* If we have indirect addressing in outputs we need to copy our alloca array
3623 * to the outputs slots specified by the caller */
3624 if (bld->gs_iface) {
3625 LLVMValueRef total_emitted_vertices_vec;
3626 LLVMValueRef emitted_prims_vec;
3627 /* implicit end_primitives, needed in case there are any unflushed
3628 vertices in the cache. Note must not call end_primitive here
3629 since the exec_mask is not valid at this point. */
3630 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3631
3632 total_emitted_vertices_vec =
3633 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3634 emitted_prims_vec =
3635 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3636
3637 bld->gs_iface->gs_epilogue(bld->gs_iface,
3638 &bld->bld_base,
3639 total_emitted_vertices_vec,
3640 emitted_prims_vec);
3641 } else {
3642 gather_outputs(bld);
3643 }
3644 }
3645
3646 void
3647 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3648 const struct tgsi_token *tokens,
3649 struct lp_type type,
3650 struct lp_build_mask_context *mask,
3651 LLVMValueRef consts_ptr,
3652 LLVMValueRef const_sizes_ptr,
3653 const struct lp_bld_tgsi_system_values *system_values,
3654 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3655 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3656 LLVMValueRef context_ptr,
3657 struct lp_build_sampler_soa *sampler,
3658 const struct tgsi_shader_info *info,
3659 const struct lp_build_tgsi_gs_iface *gs_iface)
3660 {
3661 struct lp_build_tgsi_soa_context bld;
3662
3663 struct lp_type res_type;
3664
3665 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3666 memset(&res_type, 0, sizeof res_type);
3667 res_type.width = type.width;
3668 res_type.length = type.length;
3669 res_type.sign = 1;
3670
3671 /* Setup build context */
3672 memset(&bld, 0, sizeof bld);
3673 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3674 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3675 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3676 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3677 bld.mask = mask;
3678 bld.inputs = inputs;
3679 bld.outputs = outputs;
3680 bld.consts_ptr = consts_ptr;
3681 bld.const_sizes_ptr = const_sizes_ptr;
3682 bld.sampler = sampler;
3683 bld.bld_base.info = info;
3684 bld.indirect_files = info->indirect_files;
3685 bld.context_ptr = context_ptr;
3686
3687 /*
3688 * If the number of temporaries is rather large then we just
3689 * allocate them as an array right from the start and treat
3690 * like indirect temporaries.
3691 */
3692 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3693 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3694 }
3695 /*
3696 * For performance reason immediates are always backed in a static
3697 * array, but if their number is too great, we have to use just
3698 * a dynamically allocated array.
3699 */
3700 bld.use_immediates_array =
3701 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3702 if (bld.use_immediates_array) {
3703 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3704 }
3705
3706
3707 bld.bld_base.soa = TRUE;
3708 bld.bld_base.emit_debug = emit_debug;
3709 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3710 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3711 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3712 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3713 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3714 bld.bld_base.emit_store = emit_store;
3715
3716 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3717 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3718
3719 bld.bld_base.emit_prologue = emit_prologue;
3720 bld.bld_base.emit_epilogue = emit_epilogue;
3721
3722 /* Set opcode actions */
3723 lp_set_default_actions_cpu(&bld.bld_base);
3724
3725 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3726 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3727 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3728 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3729 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3730 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3731 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3732 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3733 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3734 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3735 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3736 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3737 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3738 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3739 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3740 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3741 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3742 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3743 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3744 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3745 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3746 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3747 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3748 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3749 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3750 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3751 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3752 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3753 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3754 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3755 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3756 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3757 /* DX10 sampling ops */
3758 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3759 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3760 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3761 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3762 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3763 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3764 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3765 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3766
3767 if (gs_iface) {
3768 /* There's no specific value for this because it should always
3769 * be set, but apps using ext_geometry_shader4 quite often
3770 * were forgetting so we're using MAX_VERTEX_VARYING from
3771 * that spec even though we could debug_assert if it's not
3772 * set, but that's a lot uglier. */
3773 uint max_output_vertices;
3774
3775 /* inputs are always indirect with gs */
3776 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3777 bld.gs_iface = gs_iface;
3778 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3779 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3780 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3781
3782 max_output_vertices =
3783 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3784 if (!max_output_vertices)
3785 max_output_vertices = 32;
3786
3787 bld.max_output_vertices_vec =
3788 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3789 max_output_vertices);
3790 }
3791
3792 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3793
3794 bld.system_values = *system_values;
3795
3796 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3797
3798 if (0) {
3799 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3800 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3801 debug_printf("11111111111111111111111111111 \n");
3802 tgsi_dump(tokens, 0);
3803 lp_debug_dump_value(function);
3804 debug_printf("2222222222222222222222222222 \n");
3805 }
3806
3807 if (0) {
3808 LLVMModuleRef module = LLVMGetGlobalParent(
3809 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3810 LLVMDumpModule(module);
3811
3812 }
3813 lp_exec_mask_fini(&bld.exec_mask);
3814 }