gallivm: simplify sampler interface
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 /* SM 4.0 says that subroutines can nest 32 deep and
70 * we need one more for our main function */
71 #define LP_MAX_NUM_FUNCS 33
72
73 #define DUMP_GS_EMITS 0
74
75 /*
76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77 * instruction.
78 *
79 * TODO:
80 * - take execution masks in consideration
81 * - debug control-flow instructions
82 */
83 #define DEBUG_EXECUTION 0
84
85
86 /*
87 * Emit code to print a register value.
88 */
89 static void
90 emit_dump_reg(struct gallivm_state *gallivm,
91 unsigned file,
92 unsigned index,
93 unsigned chan,
94 LLVMValueRef value)
95 {
96 char buf[32];
97
98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
99 tgsi_file_name(file),
100 index, "xyzw"[chan]);
101
102 lp_build_print_value(gallivm, buf, value);
103 }
104
105 /*
106 * Return the context for the current function.
107 * (always 'main', if shader doesn't do any function calls)
108 */
109 static INLINE struct function_ctx *
110 func_ctx(struct lp_exec_mask *mask)
111 {
112 assert(mask->function_stack_size > 0);
113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114 return &mask->function_stack[mask->function_stack_size - 1];
115 }
116
117 /*
118 * Returns true if we're in a loop.
119 * It's global, meaning that it returns true even if there's
120 * no loop inside the current function, but we were inside
121 * a loop inside another function, from which this one was called.
122 */
123 static INLINE boolean
124 mask_has_loop(struct lp_exec_mask *mask)
125 {
126 int i;
127 for (i = mask->function_stack_size - 1; i >= 0; --i) {
128 const struct function_ctx *ctx = &mask->function_stack[i];
129 if (ctx->loop_stack_size > 0)
130 return TRUE;
131 }
132 return FALSE;
133 }
134
135 /*
136 * Returns true if we're inside a switch statement.
137 * It's global, meaning that it returns true even if there's
138 * no switch in the current function, but we were inside
139 * a switch inside another function, from which this one was called.
140 */
141 static INLINE boolean
142 mask_has_switch(struct lp_exec_mask *mask)
143 {
144 int i;
145 for (i = mask->function_stack_size - 1; i >= 0; --i) {
146 const struct function_ctx *ctx = &mask->function_stack[i];
147 if (ctx->switch_stack_size > 0)
148 return TRUE;
149 }
150 return FALSE;
151 }
152
153 /*
154 * Returns true if we're inside a conditional.
155 * It's global, meaning that it returns true even if there's
156 * no conditional in the current function, but we were inside
157 * a conditional inside another function, from which this one was called.
158 */
159 static INLINE boolean
160 mask_has_cond(struct lp_exec_mask *mask)
161 {
162 int i;
163 for (i = mask->function_stack_size - 1; i >= 0; --i) {
164 const struct function_ctx *ctx = &mask->function_stack[i];
165 if (ctx->cond_stack_size > 0)
166 return TRUE;
167 }
168 return FALSE;
169 }
170
171
172 /*
173 * Initialize a function context at the specified index.
174 */
175 static void
176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177 {
178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179 LLVMBuilderRef builder = mask->bld->gallivm->builder;
180 struct function_ctx *ctx = &mask->function_stack[function_idx];
181
182 ctx->cond_stack_size = 0;
183 ctx->loop_stack_size = 0;
184 ctx->switch_stack_size = 0;
185
186 if (function_idx == 0) {
187 ctx->ret_mask = mask->ret_mask;
188 }
189
190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191 int_type, "looplimiter");
192 LLVMBuildStore(
193 builder,
194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195 ctx->loop_limiter);
196 }
197
198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199 {
200 mask->bld = bld;
201 mask->has_mask = FALSE;
202 mask->ret_in_main = FALSE;
203 /* For the main function */
204 mask->function_stack_size = 1;
205
206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208 mask->cond_mask = mask->switch_mask =
209 LLVMConstAllOnes(mask->int_vec_type);
210
211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212 sizeof(mask->function_stack[0]));
213 lp_exec_mask_function_init(mask, 0);
214 }
215
216 static void
217 lp_exec_mask_fini(struct lp_exec_mask *mask)
218 {
219 FREE(mask->function_stack);
220 }
221
222 static void lp_exec_mask_update(struct lp_exec_mask *mask)
223 {
224 LLVMBuilderRef builder = mask->bld->gallivm->builder;
225 boolean has_loop_mask = mask_has_loop(mask);
226 boolean has_cond_mask = mask_has_cond(mask);
227 boolean has_switch_mask = mask_has_switch(mask);
228 boolean has_ret_mask = mask->function_stack_size > 1 ||
229 mask->ret_in_main;
230
231 if (has_loop_mask) {
232 /*for loops we need to update the entire mask at runtime */
233 LLVMValueRef tmp;
234 assert(mask->break_mask);
235 tmp = LLVMBuildAnd(builder,
236 mask->cont_mask,
237 mask->break_mask,
238 "maskcb");
239 mask->exec_mask = LLVMBuildAnd(builder,
240 mask->cond_mask,
241 tmp,
242 "maskfull");
243 } else
244 mask->exec_mask = mask->cond_mask;
245
246 if (has_switch_mask) {
247 mask->exec_mask = LLVMBuildAnd(builder,
248 mask->exec_mask,
249 mask->switch_mask,
250 "switchmask");
251 }
252
253 if (has_ret_mask) {
254 mask->exec_mask = LLVMBuildAnd(builder,
255 mask->exec_mask,
256 mask->ret_mask,
257 "callmask");
258 }
259
260 mask->has_mask = (has_cond_mask ||
261 has_loop_mask ||
262 has_switch_mask ||
263 has_ret_mask);
264 }
265
266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267 LLVMValueRef val)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 struct function_ctx *ctx = func_ctx(mask);
271
272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273 ctx->cond_stack_size++;
274 return;
275 }
276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278 }
279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280 assert(LLVMTypeOf(val) == mask->int_vec_type);
281 mask->cond_mask = LLVMBuildAnd(builder,
282 mask->cond_mask,
283 val,
284 "");
285 lp_exec_mask_update(mask);
286 }
287
288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289 {
290 LLVMBuilderRef builder = mask->bld->gallivm->builder;
291 struct function_ctx *ctx = func_ctx(mask);
292 LLVMValueRef prev_mask;
293 LLVMValueRef inv_mask;
294
295 assert(ctx->cond_stack_size);
296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297 return;
298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301 }
302
303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
305 mask->cond_mask = LLVMBuildAnd(builder,
306 inv_mask,
307 prev_mask, "");
308 lp_exec_mask_update(mask);
309 }
310
311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312 {
313 struct function_ctx *ctx = func_ctx(mask);
314 assert(ctx->cond_stack_size);
315 --ctx->cond_stack_size;
316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317 return;
318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319 lp_exec_mask_update(mask);
320 }
321
322 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323 {
324 LLVMBuilderRef builder = mask->bld->gallivm->builder;
325 struct function_ctx *ctx = func_ctx(mask);
326
327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328 ++ctx->loop_stack_size;
329 return;
330 }
331
332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333 ctx->break_type;
334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340 ++ctx->loop_stack_size;
341
342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
347 LLVMBuildBr(builder, ctx->loop_block);
348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
352 lp_exec_mask_update(mask);
353 }
354
355 static void lp_exec_break(struct lp_exec_mask *mask,
356 struct lp_build_tgsi_context * bld_base)
357 {
358 LLVMBuilderRef builder = mask->bld->gallivm->builder;
359 struct function_ctx *ctx = func_ctx(mask);
360
361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362 LLVMValueRef exec_mask = LLVMBuildNot(builder,
363 mask->exec_mask,
364 "break");
365
366 mask->break_mask = LLVMBuildAnd(builder,
367 mask->break_mask,
368 exec_mask, "break_full");
369 }
370 else {
371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373 opcode == TGSI_OPCODE_CASE);
374
375
376 if (ctx->switch_in_default) {
377 /*
378 * stop default execution but only if this is an unconditional switch.
379 * (The condition here is not perfect since dead code after break is
380 * allowed but should be sufficient since false negatives are just
381 * unoptimized - so we don't have to pre-evaluate that).
382 */
383 if(break_always && ctx->switch_pc) {
384 bld_base->pc = ctx->switch_pc;
385 return;
386 }
387 }
388
389 if (break_always) {
390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391 }
392 else {
393 LLVMValueRef exec_mask = LLVMBuildNot(builder,
394 mask->exec_mask,
395 "break");
396 mask->switch_mask = LLVMBuildAnd(builder,
397 mask->switch_mask,
398 exec_mask, "break_switch");
399 }
400 }
401
402 lp_exec_mask_update(mask);
403 }
404
405 static void lp_exec_break_condition(struct lp_exec_mask *mask,
406 LLVMValueRef cond)
407 {
408 LLVMBuilderRef builder = mask->bld->gallivm->builder;
409 struct function_ctx *ctx = func_ctx(mask);
410 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
411 mask->exec_mask,
412 cond, "cond_mask");
413 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
414
415 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
416 mask->break_mask = LLVMBuildAnd(builder,
417 mask->break_mask,
418 cond_mask, "breakc_full");
419 }
420 else {
421 mask->switch_mask = LLVMBuildAnd(builder,
422 mask->switch_mask,
423 cond_mask, "breakc_switch");
424 }
425
426 lp_exec_mask_update(mask);
427 }
428
429 static void lp_exec_continue(struct lp_exec_mask *mask)
430 {
431 LLVMBuilderRef builder = mask->bld->gallivm->builder;
432 LLVMValueRef exec_mask = LLVMBuildNot(builder,
433 mask->exec_mask,
434 "");
435
436 mask->cont_mask = LLVMBuildAnd(builder,
437 mask->cont_mask,
438 exec_mask, "");
439
440 lp_exec_mask_update(mask);
441 }
442
443
444 static void lp_exec_endloop(struct gallivm_state *gallivm,
445 struct lp_exec_mask *mask)
446 {
447 LLVMBuilderRef builder = mask->bld->gallivm->builder;
448 struct function_ctx *ctx = func_ctx(mask);
449 LLVMBasicBlockRef endloop;
450 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
451 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
452 mask->bld->type.width *
453 mask->bld->type.length);
454 LLVMValueRef i1cond, i2cond, icond, limiter;
455
456 assert(mask->break_mask);
457
458
459 assert(ctx->loop_stack_size);
460 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
461 --ctx->loop_stack_size;
462 return;
463 }
464
465 /*
466 * Restore the cont_mask, but don't pop
467 */
468 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
469 lp_exec_mask_update(mask);
470
471 /*
472 * Unlike the continue mask, the break_mask must be preserved across loop
473 * iterations
474 */
475 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
476
477 /* Decrement the loop limiter */
478 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
479
480 limiter = LLVMBuildSub(
481 builder,
482 limiter,
483 LLVMConstInt(int_type, 1, false),
484 "");
485
486 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
487
488 /* i1cond = (mask != 0) */
489 i1cond = LLVMBuildICmp(
490 builder,
491 LLVMIntNE,
492 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
493 LLVMConstNull(reg_type), "i1cond");
494
495 /* i2cond = (looplimiter > 0) */
496 i2cond = LLVMBuildICmp(
497 builder,
498 LLVMIntSGT,
499 limiter,
500 LLVMConstNull(int_type), "i2cond");
501
502 /* if( i1cond && i2cond ) */
503 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
504
505 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
506
507 LLVMBuildCondBr(builder,
508 icond, ctx->loop_block, endloop);
509
510 LLVMPositionBuilderAtEnd(builder, endloop);
511
512 assert(ctx->loop_stack_size);
513 --ctx->loop_stack_size;
514 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
515 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
516 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
517 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
518 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
519 ctx->switch_stack_size];
520
521 lp_exec_mask_update(mask);
522 }
523
524 static void lp_exec_switch(struct lp_exec_mask *mask,
525 LLVMValueRef switchval)
526 {
527 struct function_ctx *ctx = func_ctx(mask);
528
529 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
530 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
531 ctx->switch_stack_size++;
532 return;
533 }
534
535 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
536 ctx->break_type;
537 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
538
539 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
540 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
541 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
542 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
543 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
544 ctx->switch_stack_size++;
545
546 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
547 ctx->switch_val = switchval;
548 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
549 ctx->switch_in_default = false;
550 ctx->switch_pc = 0;
551
552 lp_exec_mask_update(mask);
553 }
554
555 static void lp_exec_endswitch(struct lp_exec_mask *mask,
556 struct lp_build_tgsi_context * bld_base)
557 {
558 LLVMBuilderRef builder = mask->bld->gallivm->builder;
559 struct function_ctx *ctx = func_ctx(mask);
560
561 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
562 ctx->switch_stack_size--;
563 return;
564 }
565
566 /* check if there's deferred default if so do it now */
567 if (ctx->switch_pc && !ctx->switch_in_default) {
568 LLVMValueRef prevmask, defaultmask;
569 unsigned tmp_pc;
570 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
571 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
572 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
573 ctx->switch_in_default = true;
574
575 lp_exec_mask_update(mask);
576
577 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
578 TGSI_OPCODE_DEFAULT);
579
580 tmp_pc = bld_base->pc;
581 bld_base->pc = ctx->switch_pc;
582 /*
583 * re-purpose switch_pc to point to here again, since we stop execution of
584 * the deferred default after next break.
585 */
586 ctx->switch_pc = tmp_pc - 1;
587
588 return;
589 }
590
591 else if (ctx->switch_pc && ctx->switch_in_default) {
592 assert(bld_base->pc == ctx->switch_pc + 1);
593 }
594
595 ctx->switch_stack_size--;
596 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
597 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
598 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
599 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
600 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
601
602 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
603
604 lp_exec_mask_update(mask);
605 }
606
607 static void lp_exec_case(struct lp_exec_mask *mask,
608 LLVMValueRef caseval)
609 {
610 LLVMBuilderRef builder = mask->bld->gallivm->builder;
611 struct function_ctx *ctx = func_ctx(mask);
612
613 LLVMValueRef casemask, prevmask;
614
615 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
616 return;
617 }
618
619 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
620 if (!ctx->switch_in_default) {
621 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
622 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
623 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
624 ctx->switch_mask_default, "sw_default_mask");
625 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
626 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
627
628 lp_exec_mask_update(mask);
629 }
630 }
631
632 /*
633 * Analyse default statement in a switch.
634 * \return true if default is last statement, false otherwise
635 * \param default_pc_start contains pc of instruction to jump to
636 * if default wasn't last but there's no
637 * fallthrough into default.
638 */
639 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
640 struct lp_build_tgsi_context * bld_base,
641 int *default_pc_start)
642 {
643 unsigned pc = bld_base->pc;
644 struct function_ctx *ctx = func_ctx(mask);
645 unsigned curr_switch_stack = ctx->switch_stack_size;
646
647 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
648 return false;
649 }
650
651 /* skip over case statements which are together with default */
652 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
653 pc++;
654 }
655
656 while (pc != -1 && pc < bld_base->num_instructions) {
657 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
658 switch (opcode) {
659 case TGSI_OPCODE_CASE:
660 if (curr_switch_stack == ctx->switch_stack_size) {
661 *default_pc_start = pc - 1;
662 return false;
663 }
664 break;
665 case TGSI_OPCODE_SWITCH:
666 curr_switch_stack++;
667 break;
668 case TGSI_OPCODE_ENDSWITCH:
669 if (curr_switch_stack == ctx->switch_stack_size) {
670 *default_pc_start = pc - 1;
671 return true;
672 }
673 curr_switch_stack--;
674 break;
675 }
676 pc++;
677 }
678 /* should never arrive here */
679 assert(0);
680 return true;
681 }
682
683 static void lp_exec_default(struct lp_exec_mask *mask,
684 struct lp_build_tgsi_context * bld_base)
685 {
686 LLVMBuilderRef builder = mask->bld->gallivm->builder;
687 struct function_ctx *ctx = func_ctx(mask);
688
689 int default_exec_pc;
690 boolean default_is_last;
691
692 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693 return;
694 }
695
696 /*
697 * This is a messy opcode, because it may not be always at the end and
698 * there can be fallthrough in and out of it.
699 */
700
701 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702 /*
703 * If it is last statement in switch (note that case statements appearing
704 * "at the same time" as default don't change that) everything is just fine,
705 * update switch mask and go on. This means we can handle default with
706 * fallthrough INTO it without overhead, if it is last.
707 */
708 if (default_is_last) {
709 LLVMValueRef prevmask, defaultmask;
710 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714 ctx->switch_in_default = true;
715
716 lp_exec_mask_update(mask);
717 }
718 else {
719 /*
720 * Technically, "case" immediately before default isn't really a
721 * fallthrough, however we still have to count them as such as we
722 * already have updated the masks.
723 * If that happens in practice could add a switch optimizer pass
724 * which just gets rid of all case statements appearing together with
725 * default (or could do switch analysis at switch start time instead).
726 */
727 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
728 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
729 opcode != TGSI_OPCODE_SWITCH);
730 /*
731 * If it is not last statement and there was no fallthrough into it,
732 * we record the PC and continue execution at next case (again, those
733 * case encountered at the same time don't count). At endswitch
734 * time, we update switchmask, and go back executing the code we skipped
735 * until the next break (possibly re-executing some code with changed mask
736 * if there was a fallthrough out of default).
737 * Finally, if it is not last statement and there was a fallthrough into it,
738 * do the same as with the former case, except instead of skipping the code
739 * just execute it without updating the mask, then go back and re-execute.
740 */
741 ctx->switch_pc = bld_base->pc;
742 if (!ft_into) {
743 bld_base->pc = default_exec_pc;
744 }
745 }
746 }
747
748
749 /* stores val into an address pointed to by dst_ptr.
750 * mask->exec_mask is used to figure out which bits of val
751 * should be stored into the address
752 * (0 means don't store this bit, 1 means do store).
753 */
754 static void lp_exec_mask_store(struct lp_exec_mask *mask,
755 struct lp_build_context *bld_store,
756 LLVMValueRef pred,
757 LLVMValueRef val,
758 LLVMValueRef dst_ptr)
759 {
760 LLVMBuilderRef builder = mask->bld->gallivm->builder;
761
762 assert(lp_check_value(bld_store->type, val));
763 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
764 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
765
766 /* Mix the predicate and execution mask */
767 if (mask->has_mask) {
768 if (pred) {
769 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
770 } else {
771 pred = mask->exec_mask;
772 }
773 }
774
775 if (pred) {
776 LLVMValueRef res, dst;
777
778 dst = LLVMBuildLoad(builder, dst_ptr, "");
779 res = lp_build_select(bld_store, pred, val, dst);
780 LLVMBuildStore(builder, res, dst_ptr);
781 } else
782 LLVMBuildStore(builder, val, dst_ptr);
783 }
784
785 static void lp_exec_mask_call(struct lp_exec_mask *mask,
786 int func,
787 int *pc)
788 {
789 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
790 return;
791 }
792
793 lp_exec_mask_function_init(mask, mask->function_stack_size);
794 mask->function_stack[mask->function_stack_size].pc = *pc;
795 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
796 mask->function_stack_size++;
797 *pc = func;
798 }
799
800 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
801 {
802 LLVMBuilderRef builder = mask->bld->gallivm->builder;
803 struct function_ctx *ctx = func_ctx(mask);
804 LLVMValueRef exec_mask;
805
806 if (ctx->cond_stack_size == 0 &&
807 ctx->loop_stack_size == 0 &&
808 ctx->switch_stack_size == 0 &&
809 mask->function_stack_size == 1) {
810 /* returning from main() */
811 *pc = -1;
812 return;
813 }
814
815 if (mask->function_stack_size == 1) {
816 /*
817 * This requires special handling since we need to ensure
818 * we don't drop the mask even if we have no call stack
819 * (e.g. after a ret in a if clause after the endif)
820 */
821 mask->ret_in_main = TRUE;
822 }
823
824 exec_mask = LLVMBuildNot(builder,
825 mask->exec_mask,
826 "ret");
827
828 mask->ret_mask = LLVMBuildAnd(builder,
829 mask->ret_mask,
830 exec_mask, "ret_full");
831
832 lp_exec_mask_update(mask);
833 }
834
835 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
836 {
837 }
838
839 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
840 {
841 struct function_ctx *ctx;
842
843 assert(mask->function_stack_size > 1);
844 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
845
846 ctx = func_ctx(mask);
847 mask->function_stack_size--;
848
849 *pc = ctx->pc;
850 mask->ret_mask = ctx->ret_mask;
851
852 lp_exec_mask_update(mask);
853 }
854
855
856 static LLVMValueRef
857 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
858 unsigned file,
859 unsigned index,
860 unsigned chan)
861 {
862 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
863 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
864 LLVMValueRef var_of_array;
865
866 switch (file) {
867 case TGSI_FILE_TEMPORARY:
868 array_of_vars = bld->temps;
869 var_of_array = bld->temps_array;
870 break;
871 case TGSI_FILE_OUTPUT:
872 array_of_vars = bld->outputs;
873 var_of_array = bld->outputs_array;
874 break;
875 default:
876 assert(0);
877 return NULL;
878 }
879
880 assert(chan < 4);
881
882 if (bld->indirect_files & (1 << file)) {
883 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
884 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
885 }
886 else {
887 assert(index <= bld->bld_base.info->file_max[file]);
888 return array_of_vars[index][chan];
889 }
890 }
891
892
893 /**
894 * Return pointer to a temporary register channel (src or dest).
895 * Note that indirect addressing cannot be handled here.
896 * \param index which temporary register
897 * \param chan which channel of the temp register.
898 */
899 LLVMValueRef
900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
901 unsigned index,
902 unsigned chan)
903 {
904 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
905 }
906
907 /**
908 * Return pointer to a output register channel (src or dest).
909 * Note that indirect addressing cannot be handled here.
910 * \param index which output register
911 * \param chan which channel of the output register.
912 */
913 LLVMValueRef
914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
915 unsigned index,
916 unsigned chan)
917 {
918 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
919 }
920
921 /*
922 * If we have indirect addressing in outputs copy our alloca array
923 * to the outputs slots specified by the caller to make sure
924 * our outputs are delivered consistently via the same interface.
925 */
926 static void
927 gather_outputs(struct lp_build_tgsi_soa_context * bld)
928 {
929 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
930 unsigned index, chan;
931 assert(bld->bld_base.info->num_outputs <=
932 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
933 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
934 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
935 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
936 }
937 }
938 }
939 }
940
941 /**
942 * Gather vector.
943 * XXX the lp_build_gather() function should be capable of doing this
944 * with a little work.
945 */
946 static LLVMValueRef
947 build_gather(struct lp_build_context *bld,
948 LLVMValueRef base_ptr,
949 LLVMValueRef indexes,
950 LLVMValueRef *overflow_mask)
951 {
952 LLVMBuilderRef builder = bld->gallivm->builder;
953 LLVMValueRef res = bld->undef;
954 unsigned i;
955 LLVMValueRef temp_ptr = NULL;
956
957 if (overflow_mask) {
958 temp_ptr = lp_build_alloca(
959 bld->gallivm,
960 lp_build_vec_type(bld->gallivm, bld->type), "");
961 }
962
963 /*
964 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
965 */
966 for (i = 0; i < bld->type.length; i++) {
967 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
968 LLVMValueRef index = LLVMBuildExtractElement(builder,
969 indexes, ii, "");
970 LLVMValueRef scalar_ptr, scalar;
971 LLVMValueRef overflow;
972 struct lp_build_if_state if_ctx;
973
974 /*
975 * overflow_mask is a boolean vector telling us which channels
976 * in the vector overflowed. We use the overflow behavior for
977 * constant buffers which is defined as:
978 * Out of bounds access to constant buffer returns 0 in all
979 * componenets. Out of bounds behavior is always with respect
980 * to the size of the buffer bound at that slot.
981 */
982 if (overflow_mask) {
983 overflow = LLVMBuildExtractElement(builder, *overflow_mask,
984 ii, "");
985 lp_build_if(&if_ctx, bld->gallivm, overflow);
986 {
987 LLVMValueRef val = LLVMBuildLoad(builder, temp_ptr, "");
988 val = LLVMBuildInsertElement(
989 builder, val,
990 LLVMConstNull(LLVMFloatTypeInContext(bld->gallivm->context)),
991 ii, "");
992 LLVMBuildStore(builder, val, temp_ptr);
993 }
994 lp_build_else(&if_ctx);
995 {
996 LLVMValueRef val = LLVMBuildLoad(builder, temp_ptr, "");
997
998 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
999 &index, 1, "gather_ptr");
1000 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1001
1002 val = LLVMBuildInsertElement(builder, val, scalar, ii, "");
1003
1004 LLVMBuildStore(builder, val, temp_ptr);
1005 }
1006 lp_build_endif(&if_ctx);
1007 } else {
1008 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1009 &index, 1, "gather_ptr");
1010 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1011
1012 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
1013 }
1014 }
1015
1016 if (overflow_mask) {
1017 res = LLVMBuildLoad(builder, temp_ptr, "gather_val");
1018 }
1019
1020 return res;
1021 }
1022
1023
1024 /**
1025 * Scatter/store vector.
1026 */
1027 static void
1028 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1029 LLVMValueRef base_ptr,
1030 LLVMValueRef indexes,
1031 LLVMValueRef values,
1032 struct lp_exec_mask *mask,
1033 LLVMValueRef pred)
1034 {
1035 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1036 LLVMBuilderRef builder = gallivm->builder;
1037 unsigned i;
1038
1039 /* Mix the predicate and execution mask */
1040 if (mask->has_mask) {
1041 if (pred) {
1042 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
1043 }
1044 else {
1045 pred = mask->exec_mask;
1046 }
1047 }
1048
1049 /*
1050 * Loop over elements of index_vec, store scalar value.
1051 */
1052 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1053 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1054 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1055 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1056 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1057 LLVMValueRef scalar_pred = pred ?
1058 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1059
1060 if (0)
1061 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1062 ii, val, index, scalar_ptr);
1063
1064 if (scalar_pred) {
1065 LLVMValueRef real_val, dst_val;
1066 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1067 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1068 LLVMBuildStore(builder, real_val, scalar_ptr);
1069 }
1070 else {
1071 LLVMBuildStore(builder, val, scalar_ptr);
1072 }
1073 }
1074 }
1075
1076
1077 /**
1078 * Read the current value of the ADDR register, convert the floats to
1079 * ints, add the base index and return the vector of offsets.
1080 * The offsets will be used to index into the constant buffer or
1081 * temporary register file.
1082 */
1083 static LLVMValueRef
1084 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1085 unsigned reg_file, unsigned reg_index,
1086 const struct tgsi_ind_register *indirect_reg)
1087 {
1088 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1089 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1090 /* always use X component of address register */
1091 unsigned swizzle = indirect_reg->Swizzle;
1092 LLVMValueRef base;
1093 LLVMValueRef rel;
1094 LLVMValueRef max_index;
1095 LLVMValueRef index;
1096
1097 assert(bld->indirect_files & (1 << reg_file));
1098
1099 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1100
1101 assert(swizzle < 4);
1102 switch (indirect_reg->File) {
1103 case TGSI_FILE_ADDRESS:
1104 rel = LLVMBuildLoad(builder,
1105 bld->addr[indirect_reg->Index][swizzle],
1106 "load addr reg");
1107 /* ADDR LLVM values already have LLVM integer type. */
1108 break;
1109 case TGSI_FILE_TEMPORARY:
1110 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1111 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1112 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1113 * value actually stored is expected to be an integer */
1114 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1115 break;
1116 default:
1117 assert(0);
1118 rel = uint_bld->zero;
1119 }
1120
1121 index = lp_build_add(uint_bld, base, rel);
1122
1123 /*
1124 * emit_fetch_constant handles constant buffer overflow so this code
1125 * is pointless for them.
1126 * Furthermore the D3D10 spec in section 6.5 says:
1127 * If the constant buffer bound to a slot is larger than the size
1128 * declared in the shader for that slot, implementations are allowed
1129 * to return incorrect data (not necessarily 0) for indices that are
1130 * larger than the declared size but smaller than the buffer size.
1131 */
1132 if (reg_file != TGSI_FILE_CONSTANT) {
1133 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1134 uint_bld->type,
1135 bld->bld_base.info->file_max[reg_file]);
1136
1137 assert(!uint_bld->type.sign);
1138 index = lp_build_min(uint_bld, index, max_index);
1139 }
1140
1141 return index;
1142 }
1143
1144 static struct lp_build_context *
1145 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1146 enum tgsi_opcode_type stype)
1147 {
1148 struct lp_build_context *bld_fetch;
1149
1150 switch (stype) {
1151 case TGSI_TYPE_FLOAT:
1152 case TGSI_TYPE_UNTYPED:
1153 bld_fetch = &bld_base->base;
1154 break;
1155 case TGSI_TYPE_UNSIGNED:
1156 bld_fetch = &bld_base->uint_bld;
1157 break;
1158 case TGSI_TYPE_SIGNED:
1159 bld_fetch = &bld_base->int_bld;
1160 break;
1161 case TGSI_TYPE_VOID:
1162 case TGSI_TYPE_DOUBLE:
1163 default:
1164 assert(0);
1165 bld_fetch = NULL;
1166 break;
1167 }
1168 return bld_fetch;
1169 }
1170
1171 static LLVMValueRef
1172 get_soa_array_offsets(struct lp_build_context *uint_bld,
1173 LLVMValueRef indirect_index,
1174 unsigned chan_index,
1175 boolean need_perelement_offset)
1176 {
1177 struct gallivm_state *gallivm = uint_bld->gallivm;
1178 LLVMValueRef chan_vec =
1179 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1180 LLVMValueRef length_vec =
1181 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1182 LLVMValueRef index_vec;
1183
1184 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1185 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1186 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1187 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1188
1189 if (need_perelement_offset) {
1190 LLVMValueRef pixel_offsets;
1191 int i;
1192 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1193 pixel_offsets = uint_bld->undef;
1194 for (i = 0; i < uint_bld->type.length; i++) {
1195 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1196 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1197 ii, ii, "");
1198 }
1199 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1200 }
1201 return index_vec;
1202 }
1203
1204 static LLVMValueRef
1205 emit_fetch_constant(
1206 struct lp_build_tgsi_context * bld_base,
1207 const struct tgsi_full_src_register * reg,
1208 enum tgsi_opcode_type stype,
1209 unsigned swizzle)
1210 {
1211 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1212 struct gallivm_state *gallivm = bld_base->base.gallivm;
1213 LLVMBuilderRef builder = gallivm->builder;
1214 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1215 unsigned dimension = 0;
1216 LLVMValueRef consts_ptr;
1217 LLVMValueRef num_consts;
1218 LLVMValueRef res;
1219
1220 /* XXX: Handle fetching xyzw components as a vector */
1221 assert(swizzle != ~0);
1222
1223 if (reg->Register.Dimension) {
1224 assert(!reg->Dimension.Indirect);
1225 dimension = reg->Dimension.Index;
1226 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1227 }
1228
1229 consts_ptr = bld->consts[dimension];
1230 num_consts = bld->consts_sizes[dimension];
1231
1232 if (reg->Register.Indirect) {
1233 LLVMValueRef indirect_index;
1234 LLVMValueRef swizzle_vec =
1235 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1236 LLVMValueRef index_vec; /* index into the const buffer */
1237 LLVMValueRef overflow_mask;
1238
1239 indirect_index = get_indirect_index(bld,
1240 reg->Register.File,
1241 reg->Register.Index,
1242 &reg->Indirect);
1243
1244 /* All fetches are from the same constant buffer, so
1245 * we need to propagate the size to a vector to do a
1246 * vector comparison */
1247 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1248 /* Construct a boolean vector telling us which channels
1249 * overflow the bound constant buffer */
1250 overflow_mask = LLVMBuildICmp(builder, LLVMIntUGE,
1251 indirect_index,
1252 num_consts, "");
1253
1254 /* index_vec = indirect_index * 4 + swizzle */
1255 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1256 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1257
1258 /* Gather values from the constant buffer */
1259 res = build_gather(&bld_base->base, consts_ptr, index_vec,
1260 &overflow_mask);
1261 }
1262 else {
1263 LLVMValueRef index; /* index into the const buffer */
1264 LLVMValueRef scalar, scalar_ptr;
1265
1266 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1267
1268 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1269 &index, 1, "");
1270 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1271 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
1272 }
1273
1274 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1275 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1276 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1277 }
1278
1279 return res;
1280 }
1281
1282 static LLVMValueRef
1283 emit_fetch_immediate(
1284 struct lp_build_tgsi_context * bld_base,
1285 const struct tgsi_full_src_register * reg,
1286 enum tgsi_opcode_type stype,
1287 unsigned swizzle)
1288 {
1289 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1290 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1291 LLVMBuilderRef builder = gallivm->builder;
1292 LLVMValueRef res = NULL;
1293
1294 if (bld->use_immediates_array || reg->Register.Indirect) {
1295 LLVMValueRef imms_array;
1296 LLVMTypeRef fptr_type;
1297
1298 /* cast imms_array pointer to float* */
1299 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1300 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1301
1302 if (reg->Register.Indirect) {
1303 LLVMValueRef indirect_index;
1304 LLVMValueRef index_vec; /* index into the immediate register array */
1305
1306 indirect_index = get_indirect_index(bld,
1307 reg->Register.File,
1308 reg->Register.Index,
1309 &reg->Indirect);
1310 /*
1311 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1312 * immediates are stored as full vectors (FIXME??? - might be better
1313 * to store them the same as constants) but all elements are the same
1314 * in any case.
1315 */
1316 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1317 indirect_index,
1318 swizzle,
1319 FALSE);
1320
1321 /* Gather values from the immediate register array */
1322 res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
1323 } else {
1324 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1325 reg->Register.Index * 4 + swizzle);
1326 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1327 bld->imms_array, &lindex, 1, "");
1328 res = LLVMBuildLoad(builder, imms_ptr, "");
1329 }
1330 }
1331 else {
1332 res = bld->immediates[reg->Register.Index][swizzle];
1333 }
1334
1335 if (stype == TGSI_TYPE_UNSIGNED) {
1336 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1337 } else if (stype == TGSI_TYPE_SIGNED) {
1338 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1339 }
1340 return res;
1341 }
1342
1343 static LLVMValueRef
1344 emit_fetch_input(
1345 struct lp_build_tgsi_context * bld_base,
1346 const struct tgsi_full_src_register * reg,
1347 enum tgsi_opcode_type stype,
1348 unsigned swizzle)
1349 {
1350 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1351 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1352 LLVMBuilderRef builder = gallivm->builder;
1353 LLVMValueRef res;
1354
1355 if (reg->Register.Indirect) {
1356 LLVMValueRef indirect_index;
1357 LLVMValueRef index_vec; /* index into the input reg array */
1358 LLVMValueRef inputs_array;
1359 LLVMTypeRef fptr_type;
1360
1361 indirect_index = get_indirect_index(bld,
1362 reg->Register.File,
1363 reg->Register.Index,
1364 &reg->Indirect);
1365
1366 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1367 indirect_index,
1368 swizzle,
1369 TRUE);
1370
1371 /* cast inputs_array pointer to float* */
1372 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1373 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1374
1375 /* Gather values from the input register array */
1376 res = build_gather(&bld_base->base, inputs_array, index_vec, NULL);
1377 } else {
1378 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1379 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1380 reg->Register.Index * 4 + swizzle);
1381 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1382 bld->inputs_array, &lindex, 1, "");
1383 res = LLVMBuildLoad(builder, input_ptr, "");
1384 }
1385 else {
1386 res = bld->inputs[reg->Register.Index][swizzle];
1387 }
1388 }
1389
1390 assert(res);
1391
1392 if (stype == TGSI_TYPE_UNSIGNED) {
1393 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1394 } else if (stype == TGSI_TYPE_SIGNED) {
1395 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1396 }
1397
1398 return res;
1399 }
1400
1401
1402 static LLVMValueRef
1403 emit_fetch_gs_input(
1404 struct lp_build_tgsi_context * bld_base,
1405 const struct tgsi_full_src_register * reg,
1406 enum tgsi_opcode_type stype,
1407 unsigned swizzle)
1408 {
1409 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1410 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1411 const struct tgsi_shader_info *info = bld->bld_base.info;
1412 LLVMBuilderRef builder = gallivm->builder;
1413 LLVMValueRef attrib_index = NULL;
1414 LLVMValueRef vertex_index = NULL;
1415 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1416 LLVMValueRef res;
1417
1418 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1419 /* This is really a system value not a regular input */
1420 assert(!reg->Register.Indirect);
1421 assert(!reg->Dimension.Indirect);
1422 res = bld->system_values.prim_id;
1423 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1424 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1425 }
1426 return res;
1427 }
1428
1429 if (reg->Register.Indirect) {
1430 attrib_index = get_indirect_index(bld,
1431 reg->Register.File,
1432 reg->Register.Index,
1433 &reg->Indirect);
1434 } else {
1435 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1436 }
1437
1438 if (reg->Dimension.Indirect) {
1439 vertex_index = get_indirect_index(bld,
1440 reg->Register.File,
1441 reg->Dimension.Index,
1442 &reg->DimIndirect);
1443 } else {
1444 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1445 }
1446
1447 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1448 reg->Dimension.Indirect,
1449 vertex_index,
1450 reg->Register.Indirect,
1451 attrib_index,
1452 swizzle_index);
1453
1454 assert(res);
1455
1456 if (stype == TGSI_TYPE_UNSIGNED) {
1457 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1458 } else if (stype == TGSI_TYPE_SIGNED) {
1459 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1460 }
1461
1462 return res;
1463 }
1464
1465 static LLVMValueRef
1466 emit_fetch_temporary(
1467 struct lp_build_tgsi_context * bld_base,
1468 const struct tgsi_full_src_register * reg,
1469 enum tgsi_opcode_type stype,
1470 unsigned swizzle)
1471 {
1472 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1473 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1474 LLVMBuilderRef builder = gallivm->builder;
1475 LLVMValueRef res;
1476
1477 if (reg->Register.Indirect) {
1478 LLVMValueRef indirect_index;
1479 LLVMValueRef index_vec; /* index into the temp reg array */
1480 LLVMValueRef temps_array;
1481 LLVMTypeRef fptr_type;
1482
1483 indirect_index = get_indirect_index(bld,
1484 reg->Register.File,
1485 reg->Register.Index,
1486 &reg->Indirect);
1487
1488 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1489 indirect_index,
1490 swizzle,
1491 TRUE);
1492
1493 /* cast temps_array pointer to float* */
1494 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1495 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1496
1497 /* Gather values from the temporary register array */
1498 res = build_gather(&bld_base->base, temps_array, index_vec, NULL);
1499 }
1500 else {
1501 LLVMValueRef temp_ptr;
1502 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1503 res = LLVMBuildLoad(builder, temp_ptr, "");
1504 }
1505
1506 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1507 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1508 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1509 }
1510
1511 return res;
1512 }
1513
1514 static LLVMValueRef
1515 emit_fetch_system_value(
1516 struct lp_build_tgsi_context * bld_base,
1517 const struct tgsi_full_src_register * reg,
1518 enum tgsi_opcode_type stype,
1519 unsigned swizzle)
1520 {
1521 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1522 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1523 const struct tgsi_shader_info *info = bld->bld_base.info;
1524 LLVMBuilderRef builder = gallivm->builder;
1525 LLVMValueRef res;
1526 enum tgsi_opcode_type atype; // Actual type of the value
1527
1528 assert(!reg->Register.Indirect);
1529
1530 switch (info->system_value_semantic_name[reg->Register.Index]) {
1531 case TGSI_SEMANTIC_INSTANCEID:
1532 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1533 atype = TGSI_TYPE_UNSIGNED;
1534 break;
1535
1536 case TGSI_SEMANTIC_VERTEXID:
1537 res = bld->system_values.vertex_id;
1538 atype = TGSI_TYPE_UNSIGNED;
1539 break;
1540
1541 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1542 res = bld->system_values.vertex_id_nobase;
1543 atype = TGSI_TYPE_UNSIGNED;
1544 break;
1545
1546 case TGSI_SEMANTIC_BASEVERTEX:
1547 res = bld->system_values.basevertex;
1548 atype = TGSI_TYPE_UNSIGNED;
1549 break;
1550
1551 case TGSI_SEMANTIC_PRIMID:
1552 res = bld->system_values.prim_id;
1553 atype = TGSI_TYPE_UNSIGNED;
1554 break;
1555
1556 default:
1557 assert(!"unexpected semantic in emit_fetch_system_value");
1558 res = bld_base->base.zero;
1559 atype = TGSI_TYPE_FLOAT;
1560 break;
1561 }
1562
1563 if (atype != stype) {
1564 if (stype == TGSI_TYPE_FLOAT) {
1565 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1566 } else if (stype == TGSI_TYPE_UNSIGNED) {
1567 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1568 } else if (stype == TGSI_TYPE_SIGNED) {
1569 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1570 }
1571 }
1572
1573 return res;
1574 }
1575
1576 /**
1577 * Register fetch with derivatives.
1578 */
1579 static void
1580 emit_fetch_deriv(
1581 struct lp_build_tgsi_soa_context *bld,
1582 LLVMValueRef src,
1583 LLVMValueRef *res,
1584 LLVMValueRef *ddx,
1585 LLVMValueRef *ddy)
1586 {
1587 if(res)
1588 *res = src;
1589
1590 /* TODO: use interpolation coeffs for inputs */
1591
1592 if(ddx)
1593 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1594
1595 if(ddy)
1596 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1597 }
1598
1599
1600 /**
1601 * Predicate.
1602 */
1603 static void
1604 emit_fetch_predicate(
1605 struct lp_build_tgsi_soa_context *bld,
1606 const struct tgsi_full_instruction *inst,
1607 LLVMValueRef *pred)
1608 {
1609 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1610 unsigned index;
1611 unsigned char swizzles[4];
1612 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1613 LLVMValueRef value;
1614 unsigned chan;
1615
1616 if (!inst->Instruction.Predicate) {
1617 TGSI_FOR_EACH_CHANNEL( chan ) {
1618 pred[chan] = NULL;
1619 }
1620 return;
1621 }
1622
1623 swizzles[0] = inst->Predicate.SwizzleX;
1624 swizzles[1] = inst->Predicate.SwizzleY;
1625 swizzles[2] = inst->Predicate.SwizzleZ;
1626 swizzles[3] = inst->Predicate.SwizzleW;
1627
1628 index = inst->Predicate.Index;
1629 assert(index < LP_MAX_TGSI_PREDS);
1630
1631 TGSI_FOR_EACH_CHANNEL( chan ) {
1632 unsigned swizzle = swizzles[chan];
1633
1634 /*
1635 * Only fetch the predicate register channels that are actually listed
1636 * in the swizzles
1637 */
1638 if (!unswizzled[swizzle]) {
1639 value = LLVMBuildLoad(builder,
1640 bld->preds[index][swizzle], "");
1641
1642 /*
1643 * Convert the value to an integer mask.
1644 *
1645 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1646 * is needlessly causing two comparisons due to storing the intermediate
1647 * result as float vector instead of an integer mask vector.
1648 */
1649 value = lp_build_compare(bld->bld_base.base.gallivm,
1650 bld->bld_base.base.type,
1651 PIPE_FUNC_NOTEQUAL,
1652 value,
1653 bld->bld_base.base.zero);
1654 if (inst->Predicate.Negate) {
1655 value = LLVMBuildNot(builder, value, "");
1656 }
1657
1658 unswizzled[swizzle] = value;
1659 } else {
1660 value = unswizzled[swizzle];
1661 }
1662
1663 pred[chan] = value;
1664 }
1665 }
1666
1667
1668 /**
1669 * Register store.
1670 */
1671 static void
1672 emit_store_chan(
1673 struct lp_build_tgsi_context *bld_base,
1674 const struct tgsi_full_instruction *inst,
1675 unsigned index,
1676 unsigned chan_index,
1677 LLVMValueRef pred,
1678 LLVMValueRef value)
1679 {
1680 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1681 struct gallivm_state *gallivm = bld_base->base.gallivm;
1682 LLVMBuilderRef builder = gallivm->builder;
1683 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1684 struct lp_build_context *float_bld = &bld_base->base;
1685 struct lp_build_context *int_bld = &bld_base->int_bld;
1686 LLVMValueRef indirect_index = NULL;
1687 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1688
1689 /*
1690 * Apply saturation.
1691 *
1692 * It is always assumed to be float.
1693 */
1694 switch( inst->Instruction.Saturate ) {
1695 case TGSI_SAT_NONE:
1696 break;
1697
1698 case TGSI_SAT_ZERO_ONE:
1699 assert(dtype == TGSI_TYPE_FLOAT ||
1700 dtype == TGSI_TYPE_UNTYPED);
1701 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1702 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1703 break;
1704
1705 case TGSI_SAT_MINUS_PLUS_ONE:
1706 assert(dtype == TGSI_TYPE_FLOAT ||
1707 dtype == TGSI_TYPE_UNTYPED);
1708 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1709 /* This will give -1.0 for NaN which is probably not what we want. */
1710 value = lp_build_max_ext(float_bld, value,
1711 lp_build_const_vec(gallivm, float_bld->type, -1.0),
1712 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1713 value = lp_build_min(float_bld, value, float_bld->one);
1714 break;
1715
1716 default:
1717 assert(0);
1718 }
1719
1720 if (reg->Register.Indirect) {
1721 indirect_index = get_indirect_index(bld,
1722 reg->Register.File,
1723 reg->Register.Index,
1724 &reg->Indirect);
1725 } else {
1726 assert(reg->Register.Index <=
1727 bld_base->info->file_max[reg->Register.File]);
1728 }
1729
1730 if (DEBUG_EXECUTION) {
1731 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1732 }
1733
1734 switch( reg->Register.File ) {
1735 case TGSI_FILE_OUTPUT:
1736 /* Outputs are always stored as floats */
1737 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1738
1739 if (reg->Register.Indirect) {
1740 LLVMValueRef index_vec; /* indexes into the output registers */
1741 LLVMValueRef outputs_array;
1742 LLVMTypeRef fptr_type;
1743
1744 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1745 indirect_index,
1746 chan_index,
1747 TRUE);
1748
1749 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1750 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1751
1752 /* Scatter store values into output registers */
1753 emit_mask_scatter(bld, outputs_array, index_vec, value,
1754 &bld->exec_mask, pred);
1755 }
1756 else {
1757 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1758 chan_index);
1759 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1760 }
1761 break;
1762
1763 case TGSI_FILE_TEMPORARY:
1764 /* Temporaries are always stored as floats */
1765 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1766
1767 if (reg->Register.Indirect) {
1768 LLVMValueRef index_vec; /* indexes into the temp registers */
1769 LLVMValueRef temps_array;
1770 LLVMTypeRef fptr_type;
1771
1772 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1773 indirect_index,
1774 chan_index,
1775 TRUE);
1776
1777 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1778 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1779
1780 /* Scatter store values into temp registers */
1781 emit_mask_scatter(bld, temps_array, index_vec, value,
1782 &bld->exec_mask, pred);
1783 }
1784 else {
1785 LLVMValueRef temp_ptr;
1786 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1787 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1788 }
1789 break;
1790
1791 case TGSI_FILE_ADDRESS:
1792 assert(dtype == TGSI_TYPE_SIGNED);
1793 assert(LLVMTypeOf(value) == int_bld->vec_type);
1794 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1795 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1796 bld->addr[reg->Register.Index][chan_index]);
1797 break;
1798
1799 case TGSI_FILE_PREDICATE:
1800 assert(LLVMTypeOf(value) == float_bld->vec_type);
1801 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1802 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1803 bld->preds[reg->Register.Index][chan_index]);
1804 break;
1805
1806 default:
1807 assert( 0 );
1808 }
1809
1810 (void)dtype;
1811 }
1812
1813 /*
1814 * Called at the beginning of the translation of each TGSI instruction, to
1815 * emit some debug code.
1816 */
1817 static void
1818 emit_debug(
1819 struct lp_build_tgsi_context * bld_base,
1820 const struct tgsi_full_instruction * inst,
1821 const struct tgsi_opcode_info * info)
1822
1823 {
1824 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1825
1826 if (DEBUG_EXECUTION) {
1827 /*
1828 * Dump the TGSI instruction.
1829 */
1830
1831 struct gallivm_state *gallivm = bld_base->base.gallivm;
1832 char buf[512];
1833 buf[0] = '$';
1834 buf[1] = ' ';
1835 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1836 lp_build_printf(gallivm, buf);
1837
1838 /* Dump the execution mask.
1839 */
1840 if (bld->exec_mask.has_mask) {
1841 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1842 }
1843 }
1844 }
1845
1846 static void
1847 emit_store(
1848 struct lp_build_tgsi_context * bld_base,
1849 const struct tgsi_full_instruction * inst,
1850 const struct tgsi_opcode_info * info,
1851 LLVMValueRef dst[4])
1852
1853 {
1854 unsigned chan_index;
1855 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1856
1857 if(info->num_dst) {
1858 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1859
1860 emit_fetch_predicate( bld, inst, pred );
1861
1862 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1863 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1864 }
1865 }
1866 }
1867
1868 static unsigned
1869 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1870 {
1871 switch (tgsi_target) {
1872 case TGSI_TEXTURE_BUFFER:
1873 return PIPE_BUFFER;
1874 case TGSI_TEXTURE_1D:
1875 case TGSI_TEXTURE_SHADOW1D:
1876 return PIPE_TEXTURE_1D;
1877 case TGSI_TEXTURE_2D:
1878 case TGSI_TEXTURE_SHADOW2D:
1879 case TGSI_TEXTURE_2D_MSAA:
1880 return PIPE_TEXTURE_2D;
1881 case TGSI_TEXTURE_3D:
1882 return PIPE_TEXTURE_3D;
1883 case TGSI_TEXTURE_CUBE:
1884 case TGSI_TEXTURE_SHADOWCUBE:
1885 return PIPE_TEXTURE_CUBE;
1886 case TGSI_TEXTURE_RECT:
1887 case TGSI_TEXTURE_SHADOWRECT:
1888 return PIPE_TEXTURE_RECT;
1889 case TGSI_TEXTURE_1D_ARRAY:
1890 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1891 return PIPE_TEXTURE_1D_ARRAY;
1892 case TGSI_TEXTURE_2D_ARRAY:
1893 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1894 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1895 return PIPE_TEXTURE_2D_ARRAY;
1896 case TGSI_TEXTURE_CUBE_ARRAY:
1897 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1898 return PIPE_TEXTURE_CUBE_ARRAY;
1899 default:
1900 assert(0);
1901 return PIPE_BUFFER;
1902 }
1903 }
1904
1905
1906 static enum lp_sampler_lod_property
1907 lp_build_lod_property(
1908 struct lp_build_tgsi_context *bld_base,
1909 const struct tgsi_full_instruction *inst,
1910 unsigned src_op)
1911 {
1912 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1913 enum lp_sampler_lod_property lod_property;
1914
1915 /*
1916 * Not much we can do here. We could try catching inputs declared
1917 * with constant interpolation but not sure it's worth it - since for
1918 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1919 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1920 * like the constant/immediate recognition below.
1921 * What seems to be of more value would be to recognize temps holding
1922 * broadcasted scalars but no way we can do it.
1923 * Tried asking llvm but without any success (using LLVMIsConstant
1924 * even though this isn't exactly what we'd need), even as simple as
1925 * IMM[0] UINT32 (0,-1,0,0)
1926 * MOV TEMP[0] IMM[0].yyyy
1927 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1928 * doesn't work.
1929 * This means there's ZERO chance this will ever catch a scalar lod
1930 * with traditional tex opcodes as well as texel fetches, since the lod
1931 * comes from the same reg as coords (except some test shaders using
1932 * constant coords maybe).
1933 * There's at least hope for sample opcodes as well as size queries.
1934 */
1935 if (reg->Register.File == TGSI_FILE_CONSTANT ||
1936 reg->Register.File == TGSI_FILE_IMMEDIATE) {
1937 lod_property = LP_SAMPLER_LOD_SCALAR;
1938 }
1939 else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
1940 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1941 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1942 }
1943 else {
1944 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1945 }
1946 }
1947 else {
1948 /* never use scalar (per-quad) lod the results are just too wrong. */
1949 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1950 }
1951 return lod_property;
1952 }
1953
1954
1955 /**
1956 * High-level instruction translators.
1957 */
1958
1959 static void
1960 emit_tex( struct lp_build_tgsi_soa_context *bld,
1961 const struct tgsi_full_instruction *inst,
1962 enum lp_build_tex_modifier modifier,
1963 LLVMValueRef *texel,
1964 unsigned sampler_reg)
1965 {
1966 unsigned unit = inst->Src[sampler_reg].Register.Index;
1967 LLVMValueRef oow = NULL;
1968 LLVMValueRef lod = NULL;
1969 LLVMValueRef coords[5];
1970 LLVMValueRef offsets[3] = { NULL };
1971 struct lp_derivatives derivs;
1972 struct lp_sampler_params params;
1973 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1974 unsigned num_derivs, num_offsets, i;
1975 unsigned shadow_coord = 0;
1976 unsigned layer_coord = 0;
1977 unsigned sample_key = 0;
1978
1979 memset(&params, 0, sizeof(params));
1980
1981 if (!bld->sampler) {
1982 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1983 for (i = 0; i < 4; i++) {
1984 texel[i] = bld->bld_base.base.undef;
1985 }
1986 return;
1987 }
1988
1989 switch (inst->Texture.Texture) {
1990 case TGSI_TEXTURE_1D_ARRAY:
1991 layer_coord = 1;
1992 /* fallthrough */
1993 case TGSI_TEXTURE_1D:
1994 num_offsets = 1;
1995 num_derivs = 1;
1996 break;
1997 case TGSI_TEXTURE_2D_ARRAY:
1998 layer_coord = 2;
1999 /* fallthrough */
2000 case TGSI_TEXTURE_2D:
2001 case TGSI_TEXTURE_RECT:
2002 num_offsets = 2;
2003 num_derivs = 2;
2004 break;
2005 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2006 layer_coord = 1;
2007 /* fallthrough */
2008 case TGSI_TEXTURE_SHADOW1D:
2009 shadow_coord = 2;
2010 num_offsets = 1;
2011 num_derivs = 1;
2012 break;
2013 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2014 layer_coord = 2;
2015 shadow_coord = 3;
2016 num_offsets = 2;
2017 num_derivs = 2;
2018 break;
2019 case TGSI_TEXTURE_SHADOW2D:
2020 case TGSI_TEXTURE_SHADOWRECT:
2021 shadow_coord = 2;
2022 num_offsets = 2;
2023 num_derivs = 2;
2024 break;
2025 case TGSI_TEXTURE_CUBE:
2026 num_offsets = 2;
2027 num_derivs = 3;
2028 break;
2029 case TGSI_TEXTURE_3D:
2030 num_offsets = 3;
2031 num_derivs = 3;
2032 break;
2033 case TGSI_TEXTURE_SHADOWCUBE:
2034 shadow_coord = 3;
2035 num_offsets = 2;
2036 num_derivs = 3;
2037 break;
2038 case TGSI_TEXTURE_CUBE_ARRAY:
2039 num_offsets = 2;
2040 num_derivs = 3;
2041 layer_coord = 3;
2042 break;
2043 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2044 num_offsets = 2;
2045 num_derivs = 3;
2046 layer_coord = 3;
2047 shadow_coord = 4; /* shadow coord special different reg */
2048 break;
2049 case TGSI_TEXTURE_2D_MSAA:
2050 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2051 default:
2052 assert(0);
2053 return;
2054 }
2055
2056 /* Note lod and especially projected are illegal in a LOT of cases */
2057 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2058 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2059 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2060 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2061 /* note that shadow cube array with bias/explicit lod does not exist */
2062 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2063 }
2064 else {
2065 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2066 }
2067 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2068 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2069 }
2070 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2071 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2072 }
2073 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2074 }
2075
2076 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2077 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2078 oow = lp_build_rcp(&bld->bld_base.base, oow);
2079 }
2080
2081 for (i = 0; i < num_derivs; i++) {
2082 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2083 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2084 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2085 }
2086 for (i = num_derivs; i < 5; i++) {
2087 coords[i] = bld->bld_base.base.undef;
2088 }
2089
2090 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2091 if (layer_coord) {
2092 if (layer_coord == 3) {
2093 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2094 }
2095 else {
2096 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2097 }
2098 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2099 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2100 }
2101 /* Shadow coord occupies always 5th slot. */
2102 if (shadow_coord) {
2103 sample_key |= LP_SAMPLER_SHADOW;
2104 if (shadow_coord == 4) {
2105 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2106 }
2107 else {
2108 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2109 }
2110 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2111 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2112 }
2113
2114 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2115 unsigned dim;
2116 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2117 for (dim = 0; dim < num_derivs; ++dim) {
2118 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2119 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2120 }
2121 params.derivs = &derivs;
2122 /*
2123 * could also check all src regs if constant but I doubt such
2124 * cases exist in practice.
2125 */
2126 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2127 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2128 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2129 }
2130 else {
2131 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2132 }
2133 }
2134 else {
2135 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2136 }
2137 }
2138 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2139
2140 /* some advanced gather instructions (txgo) would require 4 offsets */
2141 if (inst->Texture.NumOffsets == 1) {
2142 unsigned dim;
2143 sample_key |= LP_SAMPLER_OFFSETS;
2144 for (dim = 0; dim < num_offsets; dim++) {
2145 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2146 }
2147 }
2148
2149 params.type = bld->bld_base.base.type;
2150 params.sample_key = sample_key;
2151 params.texture_index = unit;
2152 params.sampler_index = unit;
2153 params.context_ptr = bld->context_ptr;
2154 params.coords = coords;
2155 params.offsets = offsets;
2156 params.lod = lod;
2157 params.texel = texel;
2158
2159 bld->sampler->emit_tex_sample(bld->sampler,
2160 bld->bld_base.base.gallivm,
2161 &params);
2162 }
2163
2164 static void
2165 emit_sample(struct lp_build_tgsi_soa_context *bld,
2166 const struct tgsi_full_instruction *inst,
2167 enum lp_build_tex_modifier modifier,
2168 boolean compare,
2169 LLVMValueRef *texel)
2170 {
2171 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2172 unsigned texture_unit, sampler_unit;
2173 LLVMValueRef lod = NULL;
2174 LLVMValueRef coords[5];
2175 LLVMValueRef offsets[3] = { NULL };
2176 struct lp_derivatives derivs;
2177 struct lp_sampler_params params;
2178 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2179
2180 unsigned num_offsets, num_derivs, i;
2181 unsigned layer_coord = 0;
2182 unsigned sample_key = 0;
2183
2184 memset(&params, 0, sizeof(params));
2185
2186 if (!bld->sampler) {
2187 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2188 for (i = 0; i < 4; i++) {
2189 texel[i] = bld->bld_base.base.undef;
2190 }
2191 return;
2192 }
2193
2194 /*
2195 * unlike old-style tex opcodes the texture/sampler indices
2196 * always come from src1 and src2 respectively.
2197 */
2198 texture_unit = inst->Src[1].Register.Index;
2199 sampler_unit = inst->Src[2].Register.Index;
2200
2201 /*
2202 * Note inst->Texture.Texture will contain the number of offsets,
2203 * however the target information is NOT there and comes from the
2204 * declared sampler views instead.
2205 */
2206 switch (bld->sv[texture_unit].Resource) {
2207 case TGSI_TEXTURE_1D:
2208 num_offsets = 1;
2209 num_derivs = 1;
2210 break;
2211 case TGSI_TEXTURE_1D_ARRAY:
2212 layer_coord = 1;
2213 num_offsets = 1;
2214 num_derivs = 1;
2215 break;
2216 case TGSI_TEXTURE_2D:
2217 case TGSI_TEXTURE_RECT:
2218 num_offsets = 2;
2219 num_derivs = 2;
2220 break;
2221 case TGSI_TEXTURE_2D_ARRAY:
2222 layer_coord = 2;
2223 num_offsets = 2;
2224 num_derivs = 2;
2225 break;
2226 case TGSI_TEXTURE_CUBE:
2227 num_offsets = 2;
2228 num_derivs = 3;
2229 break;
2230 case TGSI_TEXTURE_3D:
2231 num_offsets = 3;
2232 num_derivs = 3;
2233 break;
2234 case TGSI_TEXTURE_CUBE_ARRAY:
2235 layer_coord = 3;
2236 num_offsets = 2;
2237 num_derivs = 3;
2238 break;
2239 default:
2240 assert(0);
2241 return;
2242 }
2243
2244 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2245 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2246 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2247 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2248 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2249 }
2250 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2251 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2252 }
2253 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2254 }
2255 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2256 /* XXX might be better to explicitly pass the level zero information */
2257 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2258 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2259 }
2260
2261 for (i = 0; i < num_derivs; i++) {
2262 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2263 }
2264 for (i = num_derivs; i < 5; i++) {
2265 coords[i] = bld->bld_base.base.undef;
2266 }
2267
2268 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2269 if (layer_coord) {
2270 if (layer_coord == 3)
2271 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2272 else
2273 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2274 }
2275 /* Shadow coord occupies always 5th slot. */
2276 if (compare) {
2277 sample_key |= LP_SAMPLER_SHADOW;
2278 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2279 }
2280
2281 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2282 unsigned dim;
2283 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2284 for (dim = 0; dim < num_derivs; ++dim) {
2285 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2286 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2287 }
2288 params.derivs = &derivs;
2289 /*
2290 * could also check all src regs if constant but I doubt such
2291 * cases exist in practice.
2292 */
2293 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2294 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2295 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2296 }
2297 else {
2298 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2299 }
2300 }
2301 else {
2302 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2303 }
2304 }
2305
2306 /* some advanced gather instructions (txgo) would require 4 offsets */
2307 if (inst->Texture.NumOffsets == 1) {
2308 unsigned dim;
2309 sample_key |= LP_SAMPLER_OFFSETS;
2310 for (dim = 0; dim < num_offsets; dim++) {
2311 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2312 }
2313 }
2314 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2315
2316 params.type = bld->bld_base.base.type;
2317 params.sample_key = sample_key;
2318 params.texture_index = texture_unit;
2319 params.sampler_index = sampler_unit;
2320 params.context_ptr = bld->context_ptr;
2321 params.coords = coords;
2322 params.offsets = offsets;
2323 params.lod = lod;
2324 params.texel = texel;
2325
2326 bld->sampler->emit_tex_sample(bld->sampler,
2327 bld->bld_base.base.gallivm,
2328 &params);
2329
2330 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2331 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2332 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2333 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
2334 unsigned char swizzles[4];
2335 swizzles[0] = inst->Src[1].Register.SwizzleX;
2336 swizzles[1] = inst->Src[1].Register.SwizzleY;
2337 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2338 swizzles[3] = inst->Src[1].Register.SwizzleW;
2339
2340 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2341 }
2342 }
2343
2344 static void
2345 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2346 const struct tgsi_full_instruction *inst,
2347 LLVMValueRef *texel,
2348 boolean is_samplei)
2349 {
2350 unsigned unit, target;
2351 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2352 LLVMValueRef explicit_lod = NULL;
2353 LLVMValueRef coords[5];
2354 LLVMValueRef offsets[3] = { NULL };
2355 struct lp_sampler_params params;
2356 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2357 unsigned dims, i;
2358 unsigned layer_coord = 0;
2359 unsigned sample_key = LP_SAMPLER_FETCH;
2360
2361 memset(&params, 0, sizeof(params));
2362
2363 if (!bld->sampler) {
2364 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2365 for (i = 0; i < 4; i++) {
2366 texel[i] = coord_undef;
2367 }
2368 return;
2369 }
2370
2371 unit = inst->Src[1].Register.Index;
2372
2373 if (is_samplei) {
2374 target = bld->sv[unit].Resource;
2375 }
2376 else {
2377 target = inst->Texture.Texture;
2378 }
2379
2380 switch (target) {
2381 case TGSI_TEXTURE_1D:
2382 case TGSI_TEXTURE_BUFFER:
2383 dims = 1;
2384 break;
2385 case TGSI_TEXTURE_1D_ARRAY:
2386 layer_coord = 1;
2387 dims = 1;
2388 break;
2389 case TGSI_TEXTURE_2D:
2390 case TGSI_TEXTURE_RECT:
2391 case TGSI_TEXTURE_2D_MSAA:
2392 dims = 2;
2393 break;
2394 case TGSI_TEXTURE_2D_ARRAY:
2395 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2396 layer_coord = 2;
2397 dims = 2;
2398 break;
2399 case TGSI_TEXTURE_3D:
2400 dims = 3;
2401 break;
2402 default:
2403 assert(0);
2404 return;
2405 }
2406
2407 /* always have lod except for buffers and msaa targets ? */
2408 if (target != TGSI_TEXTURE_BUFFER &&
2409 target != TGSI_TEXTURE_2D_MSAA &&
2410 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2411 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2412 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2413 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2414 }
2415 /* XXX: for real msaa support, the w component would be the sample index. */
2416
2417 for (i = 0; i < dims; i++) {
2418 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2419 }
2420 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2421 for (i = dims; i < 5; i++) {
2422 coords[i] = coord_undef;
2423 }
2424 if (layer_coord)
2425 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2426
2427 if (inst->Texture.NumOffsets == 1) {
2428 unsigned dim;
2429 sample_key |= LP_SAMPLER_OFFSETS;
2430 for (dim = 0; dim < dims; dim++) {
2431 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2432 }
2433 }
2434 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2435
2436 params.type = bld->bld_base.base.type;
2437 params.sample_key = sample_key;
2438 params.texture_index = unit;
2439 params.sampler_index = unit;
2440 params.context_ptr = bld->context_ptr;
2441 params.coords = coords;
2442 params.offsets = offsets;
2443 params.derivs = NULL;
2444 params.lod = explicit_lod;
2445 params.texel = texel;
2446
2447 bld->sampler->emit_tex_sample(bld->sampler,
2448 bld->bld_base.base.gallivm,
2449 &params);
2450
2451 if (is_samplei &&
2452 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2453 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2454 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2455 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
2456 unsigned char swizzles[4];
2457 swizzles[0] = inst->Src[1].Register.SwizzleX;
2458 swizzles[1] = inst->Src[1].Register.SwizzleY;
2459 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2460 swizzles[3] = inst->Src[1].Register.SwizzleW;
2461
2462 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2463 }
2464 }
2465
2466 static void
2467 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2468 const struct tgsi_full_instruction *inst,
2469 LLVMValueRef *sizes_out,
2470 boolean is_sviewinfo)
2471 {
2472 LLVMValueRef explicit_lod;
2473 enum lp_sampler_lod_property lod_property;
2474 unsigned has_lod;
2475 unsigned i;
2476 unsigned unit = inst->Src[1].Register.Index;
2477 unsigned target, pipe_target;
2478
2479 if (is_sviewinfo) {
2480 target = bld->sv[unit].Resource;
2481 }
2482 else {
2483 target = inst->Texture.Texture;
2484 }
2485 switch (target) {
2486 case TGSI_TEXTURE_BUFFER:
2487 case TGSI_TEXTURE_RECT:
2488 case TGSI_TEXTURE_SHADOWRECT:
2489 has_lod = 0;
2490 break;
2491 default:
2492 has_lod = 1;
2493 break;
2494 }
2495
2496 if (!bld->sampler) {
2497 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2498 for (i = 0; i < 4; i++)
2499 sizes_out[i] = bld->bld_base.int_bld.undef;
2500 return;
2501 }
2502
2503 if (has_lod) {
2504 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2505 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2506 }
2507 else {
2508 explicit_lod = NULL;
2509 lod_property = LP_SAMPLER_LOD_SCALAR;
2510 }
2511
2512
2513 pipe_target = tgsi_to_pipe_tex_target(target);
2514
2515 bld->sampler->emit_size_query(bld->sampler,
2516 bld->bld_base.base.gallivm,
2517 bld->bld_base.int_bld.type,
2518 unit, pipe_target,
2519 bld->context_ptr,
2520 TRUE,
2521 lod_property,
2522 explicit_lod,
2523 sizes_out);
2524 }
2525
2526 static boolean
2527 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2528 int pc)
2529 {
2530 int i;
2531
2532 for (i = 0; i < 5; i++) {
2533 unsigned opcode;
2534
2535 if (pc + i >= bld->bld_base.info->num_instructions)
2536 return TRUE;
2537
2538 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2539
2540 if (opcode == TGSI_OPCODE_END)
2541 return TRUE;
2542
2543 if (opcode == TGSI_OPCODE_TEX ||
2544 opcode == TGSI_OPCODE_TXP ||
2545 opcode == TGSI_OPCODE_TXD ||
2546 opcode == TGSI_OPCODE_TXB ||
2547 opcode == TGSI_OPCODE_TXL ||
2548 opcode == TGSI_OPCODE_TXF ||
2549 opcode == TGSI_OPCODE_TXQ ||
2550 opcode == TGSI_OPCODE_TEX2 ||
2551 opcode == TGSI_OPCODE_TXB2 ||
2552 opcode == TGSI_OPCODE_TXL2 ||
2553 opcode == TGSI_OPCODE_SAMPLE ||
2554 opcode == TGSI_OPCODE_SAMPLE_B ||
2555 opcode == TGSI_OPCODE_SAMPLE_C ||
2556 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2557 opcode == TGSI_OPCODE_SAMPLE_D ||
2558 opcode == TGSI_OPCODE_SAMPLE_I ||
2559 opcode == TGSI_OPCODE_SAMPLE_L ||
2560 opcode == TGSI_OPCODE_SVIEWINFO ||
2561 opcode == TGSI_OPCODE_CAL ||
2562 opcode == TGSI_OPCODE_CALLNZ ||
2563 opcode == TGSI_OPCODE_IF ||
2564 opcode == TGSI_OPCODE_UIF ||
2565 opcode == TGSI_OPCODE_BGNLOOP ||
2566 opcode == TGSI_OPCODE_SWITCH)
2567 return FALSE;
2568 }
2569
2570 return TRUE;
2571 }
2572
2573
2574
2575 /**
2576 * Kill fragment if any of the src register values are negative.
2577 */
2578 static void
2579 emit_kill_if(
2580 struct lp_build_tgsi_soa_context *bld,
2581 const struct tgsi_full_instruction *inst,
2582 int pc)
2583 {
2584 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2585 const struct tgsi_full_src_register *reg = &inst->Src[0];
2586 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2587 LLVMValueRef mask;
2588 unsigned chan_index;
2589
2590 memset(&terms, 0, sizeof terms);
2591
2592 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2593 unsigned swizzle;
2594
2595 /* Unswizzle channel */
2596 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2597
2598 /* Check if the component has not been already tested. */
2599 assert(swizzle < TGSI_NUM_CHANNELS);
2600 if( !terms[swizzle] )
2601 /* TODO: change the comparison operator instead of setting the sign */
2602 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2603 }
2604
2605 mask = NULL;
2606 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2607 if(terms[chan_index]) {
2608 LLVMValueRef chan_mask;
2609
2610 /*
2611 * If term < 0 then mask = 0 else mask = ~0.
2612 */
2613 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2614
2615 if(mask)
2616 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2617 else
2618 mask = chan_mask;
2619 }
2620 }
2621
2622 if (bld->exec_mask.has_mask) {
2623 LLVMValueRef invmask;
2624 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2625 mask = LLVMBuildOr(builder, mask, invmask, "");
2626 }
2627
2628 lp_build_mask_update(bld->mask, mask);
2629 if (!near_end_of_shader(bld, pc))
2630 lp_build_mask_check(bld->mask);
2631 }
2632
2633
2634 /**
2635 * Unconditional fragment kill.
2636 * The only predication is the execution mask which will apply if
2637 * we're inside a loop or conditional.
2638 */
2639 static void
2640 emit_kill(struct lp_build_tgsi_soa_context *bld,
2641 int pc)
2642 {
2643 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2644 LLVMValueRef mask;
2645
2646 /* For those channels which are "alive", disable fragment shader
2647 * execution.
2648 */
2649 if (bld->exec_mask.has_mask) {
2650 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2651 }
2652 else {
2653 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2654 mask = zero;
2655 }
2656
2657 lp_build_mask_update(bld->mask, mask);
2658
2659 if (!near_end_of_shader(bld, pc))
2660 lp_build_mask_check(bld->mask);
2661 }
2662
2663
2664 /**
2665 * Emit code which will dump the value of all the temporary registers
2666 * to stdout.
2667 */
2668 static void
2669 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2670 unsigned file)
2671 {
2672 const struct tgsi_shader_info *info = bld->bld_base.info;
2673 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2674 LLVMBuilderRef builder = gallivm->builder;
2675 LLVMValueRef reg_ptr;
2676 int index;
2677 int max_index = info->file_max[file];
2678
2679 /*
2680 * Some register files, particularly constants, can be very large,
2681 * and dumping everything could make this unusably slow.
2682 */
2683 max_index = MIN2(max_index, 32);
2684
2685 for (index = 0; index <= max_index; index++) {
2686 LLVMValueRef res;
2687 unsigned mask;
2688 int chan;
2689
2690 if (index < 8 * sizeof(unsigned) &&
2691 (info->file_mask[file] & (1 << index)) == 0) {
2692 /* This was not declared.*/
2693 continue;
2694 }
2695
2696 if (file == TGSI_FILE_INPUT) {
2697 mask = info->input_usage_mask[index];
2698 } else {
2699 mask = TGSI_WRITEMASK_XYZW;
2700 }
2701
2702 for (chan = 0; chan < 4; chan++) {
2703 if ((mask & (1 << chan)) == 0) {
2704 /* This channel is not used.*/
2705 continue;
2706 }
2707
2708 if (file == TGSI_FILE_CONSTANT) {
2709 struct tgsi_full_src_register reg;
2710 memset(&reg, 0, sizeof reg);
2711 reg.Register.File = file;
2712 reg.Register.Index = index;
2713 reg.Register.SwizzleX = 0;
2714 reg.Register.SwizzleY = 1;
2715 reg.Register.SwizzleZ = 2;
2716 reg.Register.SwizzleW = 3;
2717
2718 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2719 if (!res) {
2720 continue;
2721 }
2722 } else if (file == TGSI_FILE_INPUT) {
2723 res = bld->inputs[index][chan];
2724 if (!res) {
2725 continue;
2726 }
2727 } else if (file == TGSI_FILE_TEMPORARY) {
2728 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2729 assert(reg_ptr);
2730 res = LLVMBuildLoad(builder, reg_ptr, "");
2731 } else if (file == TGSI_FILE_OUTPUT) {
2732 reg_ptr = lp_get_output_ptr(bld, index, chan);
2733 assert(reg_ptr);
2734 res = LLVMBuildLoad(builder, reg_ptr, "");
2735 } else {
2736 assert(0);
2737 continue;
2738 }
2739
2740 emit_dump_reg(gallivm, file, index, chan, res);
2741 }
2742 }
2743 }
2744
2745
2746
2747 void
2748 lp_emit_declaration_soa(
2749 struct lp_build_tgsi_context *bld_base,
2750 const struct tgsi_full_declaration *decl)
2751 {
2752 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2753 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2754 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2755 const unsigned first = decl->Range.First;
2756 const unsigned last = decl->Range.Last;
2757 unsigned idx, i;
2758
2759 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2760
2761 switch (decl->Declaration.File) {
2762 case TGSI_FILE_TEMPORARY:
2763 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2764 assert(last < LP_MAX_INLINED_TEMPS);
2765 for (idx = first; idx <= last; ++idx) {
2766 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2767 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2768 }
2769 }
2770 break;
2771
2772 case TGSI_FILE_OUTPUT:
2773 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2774 for (idx = first; idx <= last; ++idx) {
2775 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2776 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2777 vec_type, "output");
2778 }
2779 }
2780 break;
2781
2782 case TGSI_FILE_ADDRESS:
2783 /* ADDR registers are only allocated with an integer LLVM IR type,
2784 * as they are guaranteed to always have integers.
2785 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2786 * an ADDR register for that matter).
2787 */
2788 assert(last < LP_MAX_TGSI_ADDRS);
2789 for (idx = first; idx <= last; ++idx) {
2790 assert(idx < LP_MAX_TGSI_ADDRS);
2791 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2792 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2793 }
2794 break;
2795
2796 case TGSI_FILE_PREDICATE:
2797 assert(last < LP_MAX_TGSI_PREDS);
2798 for (idx = first; idx <= last; ++idx) {
2799 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2800 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2801 "predicate");
2802 }
2803 break;
2804
2805 case TGSI_FILE_SAMPLER_VIEW:
2806 /*
2807 * The target stored here MUST match whatever there actually
2808 * is in the set sampler views (what about return type?).
2809 */
2810 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2811 for (idx = first; idx <= last; ++idx) {
2812 bld->sv[idx] = decl->SamplerView;
2813 }
2814 break;
2815
2816 case TGSI_FILE_CONSTANT:
2817 {
2818 /*
2819 * We could trivially fetch the per-buffer pointer when fetching the
2820 * constant, relying on llvm to figure out it's always the same pointer
2821 * anyway. However, doing so results in a huge (more than factor of 10)
2822 * slowdown in llvm compilation times for some (but not all) shaders
2823 * (more specifically, the IR optimization spends way more time in
2824 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2825 */
2826 unsigned idx2D = decl->Dim.Index2D;
2827 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2828 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2829 bld->consts[idx2D] =
2830 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2831 bld->consts_sizes[idx2D] =
2832 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2833 }
2834 break;
2835
2836 default:
2837 /* don't need to declare other vars */
2838 break;
2839 }
2840 }
2841
2842
2843 void lp_emit_immediate_soa(
2844 struct lp_build_tgsi_context *bld_base,
2845 const struct tgsi_full_immediate *imm)
2846 {
2847 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2848 struct gallivm_state * gallivm = bld_base->base.gallivm;
2849 LLVMValueRef imms[4];
2850 unsigned i;
2851 const uint size = imm->Immediate.NrTokens - 1;
2852 assert(size <= 4);
2853 switch (imm->Immediate.DataType) {
2854 case TGSI_IMM_FLOAT32:
2855 for( i = 0; i < size; ++i )
2856 imms[i] =
2857 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2858
2859 break;
2860 case TGSI_IMM_UINT32:
2861 for( i = 0; i < size; ++i ) {
2862 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2863 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2864 }
2865
2866 break;
2867 case TGSI_IMM_INT32:
2868 for( i = 0; i < size; ++i ) {
2869 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2870 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2871 }
2872
2873 break;
2874 }
2875 for( i = size; i < 4; ++i )
2876 imms[i] = bld_base->base.undef;
2877
2878 if (bld->use_immediates_array) {
2879 unsigned index = bld->num_immediates;
2880 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2881 LLVMBuilderRef builder = gallivm->builder;
2882
2883 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2884 for (i = 0; i < 4; ++i ) {
2885 LLVMValueRef lindex = lp_build_const_int32(
2886 bld->bld_base.base.gallivm, index * 4 + i);
2887 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2888 bld->imms_array, &lindex, 1, "");
2889 LLVMBuildStore(builder, imms[i], imm_ptr);
2890 }
2891 } else {
2892 /* simply copy the immediate values into the next immediates[] slot */
2893 unsigned i;
2894 const uint size = imm->Immediate.NrTokens - 1;
2895 assert(size <= 4);
2896 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2897
2898 for(i = 0; i < 4; ++i )
2899 bld->immediates[bld->num_immediates][i] = imms[i];
2900
2901 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2902 unsigned index = bld->num_immediates;
2903 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2904 LLVMBuilderRef builder = gallivm->builder;
2905 for (i = 0; i < 4; ++i ) {
2906 LLVMValueRef lindex = lp_build_const_int32(
2907 bld->bld_base.base.gallivm, index * 4 + i);
2908 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2909 bld->imms_array, &lindex, 1, "");
2910 LLVMBuildStore(builder,
2911 bld->immediates[index][i],
2912 imm_ptr);
2913 }
2914 }
2915 }
2916
2917 bld->num_immediates++;
2918 }
2919
2920 static void
2921 ddx_emit(
2922 const struct lp_build_tgsi_action * action,
2923 struct lp_build_tgsi_context * bld_base,
2924 struct lp_build_emit_data * emit_data)
2925 {
2926 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2927
2928 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2929 &emit_data->output[emit_data->chan], NULL);
2930 }
2931
2932 static void
2933 ddy_emit(
2934 const struct lp_build_tgsi_action * action,
2935 struct lp_build_tgsi_context * bld_base,
2936 struct lp_build_emit_data * emit_data)
2937 {
2938 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2939
2940 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2941 &emit_data->output[emit_data->chan]);
2942 }
2943
2944 static void
2945 kill_emit(
2946 const struct lp_build_tgsi_action * action,
2947 struct lp_build_tgsi_context * bld_base,
2948 struct lp_build_emit_data * emit_data)
2949 {
2950 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2951
2952 emit_kill(bld, bld_base->pc - 1);
2953 }
2954
2955 static void
2956 kill_if_emit(
2957 const struct lp_build_tgsi_action * action,
2958 struct lp_build_tgsi_context * bld_base,
2959 struct lp_build_emit_data * emit_data)
2960 {
2961 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2962
2963 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2964 }
2965
2966 static void
2967 tex_emit(
2968 const struct lp_build_tgsi_action * action,
2969 struct lp_build_tgsi_context * bld_base,
2970 struct lp_build_emit_data * emit_data)
2971 {
2972 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2973
2974 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2975 emit_data->output, 1);
2976 }
2977
2978 static void
2979 tex2_emit(
2980 const struct lp_build_tgsi_action * action,
2981 struct lp_build_tgsi_context * bld_base,
2982 struct lp_build_emit_data * emit_data)
2983 {
2984 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2985
2986 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2987 emit_data->output, 2);
2988 }
2989
2990 static void
2991 txb_emit(
2992 const struct lp_build_tgsi_action * action,
2993 struct lp_build_tgsi_context * bld_base,
2994 struct lp_build_emit_data * emit_data)
2995 {
2996 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2997
2998 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2999 emit_data->output, 1);
3000 }
3001
3002 static void
3003 txb2_emit(
3004 const struct lp_build_tgsi_action * action,
3005 struct lp_build_tgsi_context * bld_base,
3006 struct lp_build_emit_data * emit_data)
3007 {
3008 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3009
3010 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3011 emit_data->output, 2);
3012 }
3013
3014 static void
3015 txd_emit(
3016 const struct lp_build_tgsi_action * action,
3017 struct lp_build_tgsi_context * bld_base,
3018 struct lp_build_emit_data * emit_data)
3019 {
3020 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3021
3022 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3023 emit_data->output, 3);
3024 }
3025
3026 static void
3027 txl_emit(
3028 const struct lp_build_tgsi_action * action,
3029 struct lp_build_tgsi_context * bld_base,
3030 struct lp_build_emit_data * emit_data)
3031 {
3032 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3033
3034 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3035 emit_data->output, 1);
3036 }
3037
3038 static void
3039 txl2_emit(
3040 const struct lp_build_tgsi_action * action,
3041 struct lp_build_tgsi_context * bld_base,
3042 struct lp_build_emit_data * emit_data)
3043 {
3044 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3045
3046 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3047 emit_data->output, 2);
3048 }
3049
3050 static void
3051 txp_emit(
3052 const struct lp_build_tgsi_action * action,
3053 struct lp_build_tgsi_context * bld_base,
3054 struct lp_build_emit_data * emit_data)
3055 {
3056 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3057
3058 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3059 emit_data->output, 1);
3060 }
3061
3062 static void
3063 txq_emit(
3064 const struct lp_build_tgsi_action * action,
3065 struct lp_build_tgsi_context * bld_base,
3066 struct lp_build_emit_data * emit_data)
3067 {
3068 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3069
3070 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3071 }
3072
3073 static void
3074 txf_emit(
3075 const struct lp_build_tgsi_action * action,
3076 struct lp_build_tgsi_context * bld_base,
3077 struct lp_build_emit_data * emit_data)
3078 {
3079 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3080
3081 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3082 }
3083
3084 static void
3085 sample_i_emit(
3086 const struct lp_build_tgsi_action * action,
3087 struct lp_build_tgsi_context * bld_base,
3088 struct lp_build_emit_data * emit_data)
3089 {
3090 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3091
3092 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3093 }
3094
3095 static void
3096 sample_emit(
3097 const struct lp_build_tgsi_action * action,
3098 struct lp_build_tgsi_context * bld_base,
3099 struct lp_build_emit_data * emit_data)
3100 {
3101 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3102
3103 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3104 FALSE, emit_data->output);
3105 }
3106
3107 static void
3108 sample_b_emit(
3109 const struct lp_build_tgsi_action * action,
3110 struct lp_build_tgsi_context * bld_base,
3111 struct lp_build_emit_data * emit_data)
3112 {
3113 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3114
3115 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3116 FALSE, emit_data->output);
3117 }
3118
3119 static void
3120 sample_c_emit(
3121 const struct lp_build_tgsi_action * action,
3122 struct lp_build_tgsi_context * bld_base,
3123 struct lp_build_emit_data * emit_data)
3124 {
3125 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3126
3127 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3128 TRUE, emit_data->output);
3129 }
3130
3131 static void
3132 sample_c_lz_emit(
3133 const struct lp_build_tgsi_action * action,
3134 struct lp_build_tgsi_context * bld_base,
3135 struct lp_build_emit_data * emit_data)
3136 {
3137 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3138
3139 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3140 TRUE, emit_data->output);
3141 }
3142
3143 static void
3144 sample_d_emit(
3145 const struct lp_build_tgsi_action * action,
3146 struct lp_build_tgsi_context * bld_base,
3147 struct lp_build_emit_data * emit_data)
3148 {
3149 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3150
3151 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3152 FALSE, emit_data->output);
3153 }
3154
3155 static void
3156 sample_l_emit(
3157 const struct lp_build_tgsi_action * action,
3158 struct lp_build_tgsi_context * bld_base,
3159 struct lp_build_emit_data * emit_data)
3160 {
3161 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3162
3163 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3164 FALSE, emit_data->output);
3165 }
3166
3167 static void
3168 sviewinfo_emit(
3169 const struct lp_build_tgsi_action * action,
3170 struct lp_build_tgsi_context * bld_base,
3171 struct lp_build_emit_data * emit_data)
3172 {
3173 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3174
3175 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3176 }
3177
3178 static LLVMValueRef
3179 mask_vec(struct lp_build_tgsi_context *bld_base)
3180 {
3181 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3182 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3183 struct lp_exec_mask *exec_mask = &bld->exec_mask;
3184
3185 if (!exec_mask->has_mask) {
3186 return lp_build_mask_value(bld->mask);
3187 }
3188 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3189 exec_mask->exec_mask, "");
3190 }
3191
3192 static void
3193 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3194 LLVMValueRef ptr,
3195 LLVMValueRef mask)
3196 {
3197 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3198 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3199
3200 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3201
3202 LLVMBuildStore(builder, current_vec, ptr);
3203 }
3204
3205 static void
3206 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3207 LLVMValueRef ptr,
3208 LLVMValueRef mask)
3209 {
3210 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3211 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3212
3213 current_vec = lp_build_select(&bld_base->uint_bld,
3214 mask,
3215 bld_base->uint_bld.zero,
3216 current_vec);
3217
3218 LLVMBuildStore(builder, current_vec, ptr);
3219 }
3220
3221 static LLVMValueRef
3222 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3223 LLVMValueRef current_mask_vec,
3224 LLVMValueRef total_emitted_vertices_vec)
3225 {
3226 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3227 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3228 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3229 total_emitted_vertices_vec,
3230 bld->max_output_vertices_vec);
3231
3232 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3233 }
3234
3235 static void
3236 emit_vertex(
3237 const struct lp_build_tgsi_action * action,
3238 struct lp_build_tgsi_context * bld_base,
3239 struct lp_build_emit_data * emit_data)
3240 {
3241 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3242 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3243
3244 if (bld->gs_iface->emit_vertex) {
3245 LLVMValueRef mask = mask_vec(bld_base);
3246 LLVMValueRef total_emitted_vertices_vec =
3247 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3248 mask = clamp_mask_to_max_output_vertices(bld, mask,
3249 total_emitted_vertices_vec);
3250 gather_outputs(bld);
3251 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3252 bld->outputs,
3253 total_emitted_vertices_vec);
3254 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3255 mask);
3256 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3257 mask);
3258 #if DUMP_GS_EMITS
3259 lp_build_print_value(bld->bld_base.base.gallivm,
3260 " +++ emit vertex masked ones = ",
3261 mask);
3262 lp_build_print_value(bld->bld_base.base.gallivm,
3263 " +++ emit vertex emitted = ",
3264 total_emitted_vertices_vec);
3265 #endif
3266 }
3267 }
3268
3269
3270 static void
3271 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3272 LLVMValueRef mask)
3273 {
3274 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3275 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3276
3277 if (bld->gs_iface->end_primitive) {
3278 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3279 LLVMValueRef emitted_vertices_vec =
3280 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3281 LLVMValueRef emitted_prims_vec =
3282 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3283
3284 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3285 emitted_vertices_vec,
3286 uint_bld->zero);
3287 /* We need to combine the current execution mask with the mask
3288 telling us which, if any, execution slots actually have
3289 unemitted primitives, this way we make sure that end_primitives
3290 executes only on the paths that have unflushed vertices */
3291 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3292
3293 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3294 emitted_vertices_vec,
3295 emitted_prims_vec);
3296
3297 #if DUMP_GS_EMITS
3298 lp_build_print_value(bld->bld_base.base.gallivm,
3299 " +++ end prim masked ones = ",
3300 mask);
3301 lp_build_print_value(bld->bld_base.base.gallivm,
3302 " +++ end prim emitted verts1 = ",
3303 emitted_vertices_vec);
3304 lp_build_print_value(bld->bld_base.base.gallivm,
3305 " +++ end prim emitted prims1 = ",
3306 LLVMBuildLoad(builder,
3307 bld->emitted_prims_vec_ptr, ""));
3308 #endif
3309 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3310 mask);
3311 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3312 mask);
3313 #if DUMP_GS_EMITS
3314 lp_build_print_value(bld->bld_base.base.gallivm,
3315 " +++ end prim emitted verts2 = ",
3316 LLVMBuildLoad(builder,
3317 bld->emitted_vertices_vec_ptr, ""));
3318 #endif
3319 }
3320
3321 }
3322
3323 static void
3324 end_primitive(
3325 const struct lp_build_tgsi_action * action,
3326 struct lp_build_tgsi_context * bld_base,
3327 struct lp_build_emit_data * emit_data)
3328 {
3329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3330
3331 if (bld->gs_iface->end_primitive) {
3332 LLVMValueRef mask = mask_vec(bld_base);
3333 end_primitive_masked(bld_base, mask);
3334 }
3335 }
3336
3337 static void
3338 cal_emit(
3339 const struct lp_build_tgsi_action * action,
3340 struct lp_build_tgsi_context * bld_base,
3341 struct lp_build_emit_data * emit_data)
3342 {
3343 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3344
3345 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3346 &bld_base->pc);
3347 }
3348
3349 static void
3350 ret_emit(
3351 const struct lp_build_tgsi_action * action,
3352 struct lp_build_tgsi_context * bld_base,
3353 struct lp_build_emit_data * emit_data)
3354 {
3355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3356
3357 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3358 }
3359
3360 static void
3361 brk_emit(
3362 const struct lp_build_tgsi_action * action,
3363 struct lp_build_tgsi_context * bld_base,
3364 struct lp_build_emit_data * emit_data)
3365 {
3366 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3367
3368 lp_exec_break(&bld->exec_mask, bld_base);
3369 }
3370
3371 static void
3372 breakc_emit(
3373 const struct lp_build_tgsi_action * action,
3374 struct lp_build_tgsi_context * bld_base,
3375 struct lp_build_emit_data * emit_data)
3376 {
3377 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3378 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3379 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3380 LLVMValueRef unsigned_cond =
3381 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3382 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3383 unsigned_cond,
3384 uint_bld->zero);
3385
3386 lp_exec_break_condition(&bld->exec_mask, cond);
3387 }
3388
3389 static void
3390 if_emit(
3391 const struct lp_build_tgsi_action * action,
3392 struct lp_build_tgsi_context * bld_base,
3393 struct lp_build_emit_data * emit_data)
3394 {
3395 LLVMValueRef tmp;
3396 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3397
3398 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3399 emit_data->args[0], bld->bld_base.base.zero);
3400 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3401 }
3402
3403 static void
3404 uif_emit(
3405 const struct lp_build_tgsi_action * action,
3406 struct lp_build_tgsi_context * bld_base,
3407 struct lp_build_emit_data * emit_data)
3408 {
3409 LLVMValueRef tmp;
3410 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3411 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3412
3413 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3414 emit_data->args[0], uint_bld->zero);
3415 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3416 }
3417
3418 static void
3419 case_emit(
3420 const struct lp_build_tgsi_action * action,
3421 struct lp_build_tgsi_context * bld_base,
3422 struct lp_build_emit_data * emit_data)
3423 {
3424 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3425
3426 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3427 }
3428
3429 static void
3430 default_emit(
3431 const struct lp_build_tgsi_action * action,
3432 struct lp_build_tgsi_context * bld_base,
3433 struct lp_build_emit_data * emit_data)
3434 {
3435 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3436
3437 lp_exec_default(&bld->exec_mask, bld_base);
3438 }
3439
3440 static void
3441 switch_emit(
3442 const struct lp_build_tgsi_action * action,
3443 struct lp_build_tgsi_context * bld_base,
3444 struct lp_build_emit_data * emit_data)
3445 {
3446 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3447
3448 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3449 }
3450
3451 static void
3452 endswitch_emit(
3453 const struct lp_build_tgsi_action * action,
3454 struct lp_build_tgsi_context * bld_base,
3455 struct lp_build_emit_data * emit_data)
3456 {
3457 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3458
3459 lp_exec_endswitch(&bld->exec_mask, bld_base);
3460 }
3461
3462 static void
3463 bgnloop_emit(
3464 const struct lp_build_tgsi_action * action,
3465 struct lp_build_tgsi_context * bld_base,
3466 struct lp_build_emit_data * emit_data)
3467 {
3468 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3469
3470 lp_exec_bgnloop(&bld->exec_mask);
3471 }
3472
3473 static void
3474 bgnsub_emit(
3475 const struct lp_build_tgsi_action * action,
3476 struct lp_build_tgsi_context * bld_base,
3477 struct lp_build_emit_data * emit_data)
3478 {
3479 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3480
3481 lp_exec_mask_bgnsub(&bld->exec_mask);
3482 }
3483
3484 static void
3485 else_emit(
3486 const struct lp_build_tgsi_action * action,
3487 struct lp_build_tgsi_context * bld_base,
3488 struct lp_build_emit_data * emit_data)
3489 {
3490 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3491
3492 lp_exec_mask_cond_invert(&bld->exec_mask);
3493 }
3494
3495 static void
3496 endif_emit(
3497 const struct lp_build_tgsi_action * action,
3498 struct lp_build_tgsi_context * bld_base,
3499 struct lp_build_emit_data * emit_data)
3500 {
3501 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3502
3503 lp_exec_mask_cond_pop(&bld->exec_mask);
3504 }
3505
3506 static void
3507 endloop_emit(
3508 const struct lp_build_tgsi_action * action,
3509 struct lp_build_tgsi_context * bld_base,
3510 struct lp_build_emit_data * emit_data)
3511 {
3512 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3513
3514 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3515 }
3516
3517 static void
3518 endsub_emit(
3519 const struct lp_build_tgsi_action * action,
3520 struct lp_build_tgsi_context * bld_base,
3521 struct lp_build_emit_data * emit_data)
3522 {
3523 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3524
3525 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3526 }
3527
3528 static void
3529 cont_emit(
3530 const struct lp_build_tgsi_action * action,
3531 struct lp_build_tgsi_context * bld_base,
3532 struct lp_build_emit_data * emit_data)
3533 {
3534 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3535
3536 lp_exec_continue(&bld->exec_mask);
3537 }
3538
3539 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3540 {
3541 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3542 struct gallivm_state * gallivm = bld_base->base.gallivm;
3543
3544 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3545 LLVMValueRef array_size =
3546 lp_build_const_int32(gallivm,
3547 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3548 bld->temps_array = lp_build_array_alloca(gallivm,
3549 bld_base->base.vec_type, array_size,
3550 "temp_array");
3551 }
3552
3553 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3554 LLVMValueRef array_size =
3555 lp_build_const_int32(gallivm,
3556 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3557 bld->outputs_array = lp_build_array_alloca(gallivm,
3558 bld_base->base.vec_type, array_size,
3559 "output_array");
3560 }
3561
3562 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3563 LLVMValueRef array_size =
3564 lp_build_const_int32(gallivm,
3565 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3566 bld->imms_array = lp_build_array_alloca(gallivm,
3567 bld_base->base.vec_type, array_size,
3568 "imms_array");
3569 }
3570
3571 /* If we have indirect addressing in inputs we need to copy them into
3572 * our alloca array to be able to iterate over them */
3573 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3574 unsigned index, chan;
3575 LLVMTypeRef vec_type = bld_base->base.vec_type;
3576 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3577 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3578 bld->inputs_array = lp_build_array_alloca(gallivm,
3579 vec_type, array_size,
3580 "input_array");
3581
3582 assert(bld_base->info->num_inputs
3583 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3584
3585 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3586 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3587 LLVMValueRef lindex =
3588 lp_build_const_int32(gallivm, index * 4 + chan);
3589 LLVMValueRef input_ptr =
3590 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3591 &lindex, 1, "");
3592 LLVMValueRef value = bld->inputs[index][chan];
3593 if (value)
3594 LLVMBuildStore(gallivm->builder, value, input_ptr);
3595 }
3596 }
3597 }
3598
3599 if (bld->gs_iface) {
3600 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3601 bld->emitted_prims_vec_ptr =
3602 lp_build_alloca(gallivm,
3603 uint_bld->vec_type,
3604 "emitted_prims_ptr");
3605 bld->emitted_vertices_vec_ptr =
3606 lp_build_alloca(gallivm,
3607 uint_bld->vec_type,
3608 "emitted_vertices_ptr");
3609 bld->total_emitted_vertices_vec_ptr =
3610 lp_build_alloca(gallivm,
3611 uint_bld->vec_type,
3612 "total_emitted_vertices_ptr");
3613
3614 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3615 bld->emitted_prims_vec_ptr);
3616 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3617 bld->emitted_vertices_vec_ptr);
3618 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3619 bld->total_emitted_vertices_vec_ptr);
3620 }
3621
3622 if (DEBUG_EXECUTION) {
3623 lp_build_printf(gallivm, "\n");
3624 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3625 if (!bld->gs_iface)
3626 emit_dump_file(bld, TGSI_FILE_INPUT);
3627 }
3628 }
3629
3630 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3631 {
3632 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3633 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3634
3635 if (DEBUG_EXECUTION) {
3636 /* for debugging */
3637 if (0) {
3638 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3639 }
3640 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3641 lp_build_printf(bld_base->base.gallivm, "\n");
3642 }
3643
3644 /* If we have indirect addressing in outputs we need to copy our alloca array
3645 * to the outputs slots specified by the caller */
3646 if (bld->gs_iface) {
3647 LLVMValueRef total_emitted_vertices_vec;
3648 LLVMValueRef emitted_prims_vec;
3649 /* implicit end_primitives, needed in case there are any unflushed
3650 vertices in the cache. Note must not call end_primitive here
3651 since the exec_mask is not valid at this point. */
3652 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3653
3654 total_emitted_vertices_vec =
3655 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3656 emitted_prims_vec =
3657 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3658
3659 bld->gs_iface->gs_epilogue(bld->gs_iface,
3660 &bld->bld_base,
3661 total_emitted_vertices_vec,
3662 emitted_prims_vec);
3663 } else {
3664 gather_outputs(bld);
3665 }
3666 }
3667
3668 void
3669 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3670 const struct tgsi_token *tokens,
3671 struct lp_type type,
3672 struct lp_build_mask_context *mask,
3673 LLVMValueRef consts_ptr,
3674 LLVMValueRef const_sizes_ptr,
3675 const struct lp_bld_tgsi_system_values *system_values,
3676 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3677 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3678 LLVMValueRef context_ptr,
3679 struct lp_build_sampler_soa *sampler,
3680 const struct tgsi_shader_info *info,
3681 const struct lp_build_tgsi_gs_iface *gs_iface)
3682 {
3683 struct lp_build_tgsi_soa_context bld;
3684
3685 struct lp_type res_type;
3686
3687 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3688 memset(&res_type, 0, sizeof res_type);
3689 res_type.width = type.width;
3690 res_type.length = type.length;
3691 res_type.sign = 1;
3692
3693 /* Setup build context */
3694 memset(&bld, 0, sizeof bld);
3695 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3696 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3697 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3698 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3699 bld.mask = mask;
3700 bld.inputs = inputs;
3701 bld.outputs = outputs;
3702 bld.consts_ptr = consts_ptr;
3703 bld.const_sizes_ptr = const_sizes_ptr;
3704 bld.sampler = sampler;
3705 bld.bld_base.info = info;
3706 bld.indirect_files = info->indirect_files;
3707 bld.context_ptr = context_ptr;
3708
3709 /*
3710 * If the number of temporaries is rather large then we just
3711 * allocate them as an array right from the start and treat
3712 * like indirect temporaries.
3713 */
3714 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3715 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3716 }
3717 /*
3718 * For performance reason immediates are always backed in a static
3719 * array, but if their number is too great, we have to use just
3720 * a dynamically allocated array.
3721 */
3722 bld.use_immediates_array =
3723 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3724 if (bld.use_immediates_array) {
3725 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3726 }
3727
3728
3729 bld.bld_base.soa = TRUE;
3730 bld.bld_base.emit_debug = emit_debug;
3731 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3732 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3733 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3734 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3735 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3736 bld.bld_base.emit_store = emit_store;
3737
3738 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3739 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3740
3741 bld.bld_base.emit_prologue = emit_prologue;
3742 bld.bld_base.emit_epilogue = emit_epilogue;
3743
3744 /* Set opcode actions */
3745 lp_set_default_actions_cpu(&bld.bld_base);
3746
3747 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3748 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3749 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3750 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3751 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3752 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3753 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3754 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3755 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3756 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3757 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3758 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3759 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3760 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3761 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3762 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3763 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3764 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3765 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3766 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3767 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3768 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3769 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3770 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3771 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3772 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3773 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3774 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3775 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3776 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3777 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3778 /* DX10 sampling ops */
3779 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3780 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3781 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3782 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3783 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3784 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3785 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3786 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3787
3788 if (gs_iface) {
3789 /* There's no specific value for this because it should always
3790 * be set, but apps using ext_geometry_shader4 quite often
3791 * were forgetting so we're using MAX_VERTEX_VARYING from
3792 * that spec even though we could debug_assert if it's not
3793 * set, but that's a lot uglier. */
3794 uint max_output_vertices;
3795
3796 /* inputs are always indirect with gs */
3797 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3798 bld.gs_iface = gs_iface;
3799 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3800 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3801 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3802
3803 max_output_vertices =
3804 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3805 if (!max_output_vertices)
3806 max_output_vertices = 32;
3807
3808 bld.max_output_vertices_vec =
3809 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3810 max_output_vertices);
3811 }
3812
3813 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3814
3815 bld.system_values = *system_values;
3816
3817 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3818
3819 if (0) {
3820 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3821 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3822 debug_printf("11111111111111111111111111111 \n");
3823 tgsi_dump(tokens, 0);
3824 lp_debug_dump_value(function);
3825 debug_printf("2222222222222222222222222222 \n");
3826 }
3827
3828 if (0) {
3829 LLVMModuleRef module = LLVMGetGlobalParent(
3830 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3831 LLVMDumpModule(module);
3832
3833 }
3834 lp_exec_mask_fini(&bld.exec_mask);
3835 }