9bfe41b9062dc73d67b4f82a98fa14504d3a7aa4
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 /* SM 4.0 says that subroutines can nest 32 deep and
73 * we need one more for our main function */
74 #define LP_MAX_NUM_FUNCS 33
75
76 #define DUMP_GS_EMITS 0
77
78 /*
79 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
80 * instruction.
81 *
82 * TODO:
83 * - take execution masks in consideration
84 * - debug control-flow instructions
85 */
86 #define DEBUG_EXECUTION 0
87
88
89 /*
90 * Emit code to print a register value.
91 */
92 static void
93 emit_dump_reg(struct gallivm_state *gallivm,
94 unsigned file,
95 unsigned index,
96 unsigned chan,
97 LLVMValueRef value)
98 {
99 char buf[32];
100
101 snprintf(buf, sizeof buf, " %s[%u].%c = ",
102 tgsi_file_name(file),
103 index, "xyzw"[chan]);
104
105 lp_build_print_value(gallivm, buf, value);
106 }
107
108 /*
109 * Return the context for the current function.
110 * (always 'main', if shader doesn't do any function calls)
111 */
112 static inline struct function_ctx *
113 func_ctx(struct lp_exec_mask *mask)
114 {
115 assert(mask->function_stack_size > 0);
116 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
117 return &mask->function_stack[mask->function_stack_size - 1];
118 }
119
120 /*
121 * Returns true if we're in a loop.
122 * It's global, meaning that it returns true even if there's
123 * no loop inside the current function, but we were inside
124 * a loop inside another function, from which this one was called.
125 */
126 static inline boolean
127 mask_has_loop(struct lp_exec_mask *mask)
128 {
129 int i;
130 for (i = mask->function_stack_size - 1; i >= 0; --i) {
131 const struct function_ctx *ctx = &mask->function_stack[i];
132 if (ctx->loop_stack_size > 0)
133 return TRUE;
134 }
135 return FALSE;
136 }
137
138 /*
139 * combine the execution mask if there is one with the current mask.
140 */
141 static LLVMValueRef
142 mask_vec(struct lp_build_tgsi_context *bld_base)
143 {
144 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
145 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
146 struct lp_exec_mask *exec_mask = &bld->exec_mask;
147 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
148 if (!exec_mask->has_mask) {
149 return bld_mask;
150 }
151 if (!bld_mask)
152 return exec_mask->exec_mask;
153 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
154 exec_mask->exec_mask, "");
155 }
156
157 /*
158 * Returns true if we're inside a switch statement.
159 * It's global, meaning that it returns true even if there's
160 * no switch in the current function, but we were inside
161 * a switch inside another function, from which this one was called.
162 */
163 static inline boolean
164 mask_has_switch(struct lp_exec_mask *mask)
165 {
166 int i;
167 for (i = mask->function_stack_size - 1; i >= 0; --i) {
168 const struct function_ctx *ctx = &mask->function_stack[i];
169 if (ctx->switch_stack_size > 0)
170 return TRUE;
171 }
172 return FALSE;
173 }
174
175 /*
176 * Returns true if we're inside a conditional.
177 * It's global, meaning that it returns true even if there's
178 * no conditional in the current function, but we were inside
179 * a conditional inside another function, from which this one was called.
180 */
181 static inline boolean
182 mask_has_cond(struct lp_exec_mask *mask)
183 {
184 int i;
185 for (i = mask->function_stack_size - 1; i >= 0; --i) {
186 const struct function_ctx *ctx = &mask->function_stack[i];
187 if (ctx->cond_stack_size > 0)
188 return TRUE;
189 }
190 return FALSE;
191 }
192
193
194 /*
195 * Initialize a function context at the specified index.
196 */
197 static void
198 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
199 {
200 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
201 LLVMBuilderRef builder = mask->bld->gallivm->builder;
202 struct function_ctx *ctx = &mask->function_stack[function_idx];
203
204 ctx->cond_stack_size = 0;
205 ctx->loop_stack_size = 0;
206 ctx->switch_stack_size = 0;
207
208 if (function_idx == 0) {
209 ctx->ret_mask = mask->ret_mask;
210 }
211
212 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
213 int_type, "looplimiter");
214 LLVMBuildStore(
215 builder,
216 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
217 ctx->loop_limiter);
218 }
219
220 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
221 {
222 mask->bld = bld;
223 mask->has_mask = FALSE;
224 mask->ret_in_main = FALSE;
225 /* For the main function */
226 mask->function_stack_size = 1;
227
228 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
229 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
230 mask->cond_mask = mask->switch_mask =
231 LLVMConstAllOnes(mask->int_vec_type);
232
233 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
234 sizeof(mask->function_stack[0]));
235 lp_exec_mask_function_init(mask, 0);
236 }
237
238 static void
239 lp_exec_mask_fini(struct lp_exec_mask *mask)
240 {
241 FREE(mask->function_stack);
242 }
243
244 static void lp_exec_mask_update(struct lp_exec_mask *mask)
245 {
246 LLVMBuilderRef builder = mask->bld->gallivm->builder;
247 boolean has_loop_mask = mask_has_loop(mask);
248 boolean has_cond_mask = mask_has_cond(mask);
249 boolean has_switch_mask = mask_has_switch(mask);
250 boolean has_ret_mask = mask->function_stack_size > 1 ||
251 mask->ret_in_main;
252
253 if (has_loop_mask) {
254 /*for loops we need to update the entire mask at runtime */
255 LLVMValueRef tmp;
256 assert(mask->break_mask);
257 tmp = LLVMBuildAnd(builder,
258 mask->cont_mask,
259 mask->break_mask,
260 "maskcb");
261 mask->exec_mask = LLVMBuildAnd(builder,
262 mask->cond_mask,
263 tmp,
264 "maskfull");
265 } else
266 mask->exec_mask = mask->cond_mask;
267
268 if (has_switch_mask) {
269 mask->exec_mask = LLVMBuildAnd(builder,
270 mask->exec_mask,
271 mask->switch_mask,
272 "switchmask");
273 }
274
275 if (has_ret_mask) {
276 mask->exec_mask = LLVMBuildAnd(builder,
277 mask->exec_mask,
278 mask->ret_mask,
279 "callmask");
280 }
281
282 mask->has_mask = (has_cond_mask ||
283 has_loop_mask ||
284 has_switch_mask ||
285 has_ret_mask);
286 }
287
288 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
289 LLVMValueRef val)
290 {
291 LLVMBuilderRef builder = mask->bld->gallivm->builder;
292 struct function_ctx *ctx = func_ctx(mask);
293
294 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
295 ctx->cond_stack_size++;
296 return;
297 }
298 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
299 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
300 }
301 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
302 assert(LLVMTypeOf(val) == mask->int_vec_type);
303 mask->cond_mask = LLVMBuildAnd(builder,
304 mask->cond_mask,
305 val,
306 "");
307 lp_exec_mask_update(mask);
308 }
309
310 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
311 {
312 LLVMBuilderRef builder = mask->bld->gallivm->builder;
313 struct function_ctx *ctx = func_ctx(mask);
314 LLVMValueRef prev_mask;
315 LLVMValueRef inv_mask;
316
317 assert(ctx->cond_stack_size);
318 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
319 return;
320 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
321 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
322 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
323 }
324
325 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
326
327 mask->cond_mask = LLVMBuildAnd(builder,
328 inv_mask,
329 prev_mask, "");
330 lp_exec_mask_update(mask);
331 }
332
333 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
334 {
335 struct function_ctx *ctx = func_ctx(mask);
336 assert(ctx->cond_stack_size);
337 --ctx->cond_stack_size;
338 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
339 return;
340 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
341 lp_exec_mask_update(mask);
342 }
343
344 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
345 {
346 LLVMBuilderRef builder = mask->bld->gallivm->builder;
347 struct function_ctx *ctx = func_ctx(mask);
348
349 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
350 ++ctx->loop_stack_size;
351 return;
352 }
353
354 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
355 ctx->break_type;
356 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
357
358 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
359 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
360 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
361 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
362 ++ctx->loop_stack_size;
363
364 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
365 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
366
367 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
368
369 LLVMBuildBr(builder, ctx->loop_block);
370 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
371
372 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
373
374 lp_exec_mask_update(mask);
375 }
376
377 static void lp_exec_break(struct lp_exec_mask *mask,
378 struct lp_build_tgsi_context * bld_base)
379 {
380 LLVMBuilderRef builder = mask->bld->gallivm->builder;
381 struct function_ctx *ctx = func_ctx(mask);
382
383 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
384 LLVMValueRef exec_mask = LLVMBuildNot(builder,
385 mask->exec_mask,
386 "break");
387
388 mask->break_mask = LLVMBuildAnd(builder,
389 mask->break_mask,
390 exec_mask, "break_full");
391 }
392 else {
393 enum tgsi_opcode opcode =
394 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
395 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
396 opcode == TGSI_OPCODE_CASE);
397
398
399 if (ctx->switch_in_default) {
400 /*
401 * stop default execution but only if this is an unconditional switch.
402 * (The condition here is not perfect since dead code after break is
403 * allowed but should be sufficient since false negatives are just
404 * unoptimized - so we don't have to pre-evaluate that).
405 */
406 if(break_always && ctx->switch_pc) {
407 bld_base->pc = ctx->switch_pc;
408 return;
409 }
410 }
411
412 if (break_always) {
413 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
414 }
415 else {
416 LLVMValueRef exec_mask = LLVMBuildNot(builder,
417 mask->exec_mask,
418 "break");
419 mask->switch_mask = LLVMBuildAnd(builder,
420 mask->switch_mask,
421 exec_mask, "break_switch");
422 }
423 }
424
425 lp_exec_mask_update(mask);
426 }
427
428 static void lp_exec_continue(struct lp_exec_mask *mask)
429 {
430 LLVMBuilderRef builder = mask->bld->gallivm->builder;
431 LLVMValueRef exec_mask = LLVMBuildNot(builder,
432 mask->exec_mask,
433 "");
434
435 mask->cont_mask = LLVMBuildAnd(builder,
436 mask->cont_mask,
437 exec_mask, "");
438
439 lp_exec_mask_update(mask);
440 }
441
442
443 static void lp_exec_endloop(struct gallivm_state *gallivm,
444 struct lp_exec_mask *mask)
445 {
446 LLVMBuilderRef builder = mask->bld->gallivm->builder;
447 struct function_ctx *ctx = func_ctx(mask);
448 LLVMBasicBlockRef endloop;
449 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
450 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
451 mask->bld->type.width *
452 mask->bld->type.length);
453 LLVMValueRef i1cond, i2cond, icond, limiter;
454
455 assert(mask->break_mask);
456
457
458 assert(ctx->loop_stack_size);
459 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
460 --ctx->loop_stack_size;
461 return;
462 }
463
464 /*
465 * Restore the cont_mask, but don't pop
466 */
467 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
468 lp_exec_mask_update(mask);
469
470 /*
471 * Unlike the continue mask, the break_mask must be preserved across loop
472 * iterations
473 */
474 LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
475
476 /* Decrement the loop limiter */
477 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
478
479 limiter = LLVMBuildSub(
480 builder,
481 limiter,
482 LLVMConstInt(int_type, 1, false),
483 "");
484
485 LLVMBuildStore(builder, limiter, ctx->loop_limiter);
486
487 /* i1cond = (mask != 0) */
488 i1cond = LLVMBuildICmp(
489 builder,
490 LLVMIntNE,
491 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
492 LLVMConstNull(reg_type), "i1cond");
493
494 /* i2cond = (looplimiter > 0) */
495 i2cond = LLVMBuildICmp(
496 builder,
497 LLVMIntSGT,
498 limiter,
499 LLVMConstNull(int_type), "i2cond");
500
501 /* if( i1cond && i2cond ) */
502 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
503
504 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
505
506 LLVMBuildCondBr(builder,
507 icond, ctx->loop_block, endloop);
508
509 LLVMPositionBuilderAtEnd(builder, endloop);
510
511 assert(ctx->loop_stack_size);
512 --ctx->loop_stack_size;
513 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
514 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
515 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
516 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
517 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
518 ctx->switch_stack_size];
519
520 lp_exec_mask_update(mask);
521 }
522
523 static void lp_exec_switch(struct lp_exec_mask *mask,
524 LLVMValueRef switchval)
525 {
526 struct function_ctx *ctx = func_ctx(mask);
527
528 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
529 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
530 ctx->switch_stack_size++;
531 return;
532 }
533
534 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
535 ctx->break_type;
536 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
537
538 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
539 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
540 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
541 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
542 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
543 ctx->switch_stack_size++;
544
545 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
546 ctx->switch_val = switchval;
547 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
548 ctx->switch_in_default = false;
549 ctx->switch_pc = 0;
550
551 lp_exec_mask_update(mask);
552 }
553
554 static void lp_exec_endswitch(struct lp_exec_mask *mask,
555 struct lp_build_tgsi_context * bld_base)
556 {
557 LLVMBuilderRef builder = mask->bld->gallivm->builder;
558 struct function_ctx *ctx = func_ctx(mask);
559
560 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
561 ctx->switch_stack_size--;
562 return;
563 }
564
565 /* check if there's deferred default if so do it now */
566 if (ctx->switch_pc && !ctx->switch_in_default) {
567 LLVMValueRef prevmask, defaultmask;
568 unsigned tmp_pc;
569 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
570 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
571 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
572 ctx->switch_in_default = true;
573
574 lp_exec_mask_update(mask);
575
576 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
577 TGSI_OPCODE_DEFAULT);
578
579 tmp_pc = bld_base->pc;
580 bld_base->pc = ctx->switch_pc;
581 /*
582 * re-purpose switch_pc to point to here again, since we stop execution of
583 * the deferred default after next break.
584 */
585 ctx->switch_pc = tmp_pc - 1;
586
587 return;
588 }
589
590 else if (ctx->switch_pc && ctx->switch_in_default) {
591 assert(bld_base->pc == ctx->switch_pc + 1);
592 }
593
594 ctx->switch_stack_size--;
595 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
596 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
597 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
598 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
599 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
600
601 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
602
603 lp_exec_mask_update(mask);
604 }
605
606 static void lp_exec_case(struct lp_exec_mask *mask,
607 LLVMValueRef caseval)
608 {
609 LLVMBuilderRef builder = mask->bld->gallivm->builder;
610 struct function_ctx *ctx = func_ctx(mask);
611
612 LLVMValueRef casemask, prevmask;
613
614 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
615 return;
616 }
617
618 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
619 if (!ctx->switch_in_default) {
620 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
621 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
622 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
623 ctx->switch_mask_default, "sw_default_mask");
624 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
625 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
626
627 lp_exec_mask_update(mask);
628 }
629 }
630
631 /*
632 * Analyse default statement in a switch.
633 * \return true if default is last statement, false otherwise
634 * \param default_pc_start contains pc of instruction to jump to
635 * if default wasn't last but there's no
636 * fallthrough into default.
637 */
638 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
639 struct lp_build_tgsi_context * bld_base,
640 int *default_pc_start)
641 {
642 unsigned pc = bld_base->pc;
643 struct function_ctx *ctx = func_ctx(mask);
644 int curr_switch_stack = ctx->switch_stack_size;
645
646 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
647 return false;
648 }
649
650 /* skip over case statements which are together with default */
651 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
652 pc++;
653 }
654
655 while (pc != ~0u && pc < bld_base->num_instructions) {
656 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
657 switch (opcode) {
658 case TGSI_OPCODE_CASE:
659 if (curr_switch_stack == ctx->switch_stack_size) {
660 *default_pc_start = pc - 1;
661 return false;
662 }
663 break;
664 case TGSI_OPCODE_SWITCH:
665 curr_switch_stack++;
666 break;
667 case TGSI_OPCODE_ENDSWITCH:
668 if (curr_switch_stack == ctx->switch_stack_size) {
669 *default_pc_start = pc - 1;
670 return true;
671 }
672 curr_switch_stack--;
673 break;
674 default:
675 ; /* nothing */
676 }
677 pc++;
678 }
679 /* should never arrive here */
680 assert(0);
681 return true;
682 }
683
684 static void lp_exec_default(struct lp_exec_mask *mask,
685 struct lp_build_tgsi_context * bld_base)
686 {
687 LLVMBuilderRef builder = mask->bld->gallivm->builder;
688 struct function_ctx *ctx = func_ctx(mask);
689
690 int default_exec_pc;
691 boolean default_is_last;
692
693 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
694 return;
695 }
696
697 /*
698 * This is a messy opcode, because it may not be always at the end and
699 * there can be fallthrough in and out of it.
700 */
701
702 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
703 /*
704 * If it is last statement in switch (note that case statements appearing
705 * "at the same time" as default don't change that) everything is just fine,
706 * update switch mask and go on. This means we can handle default with
707 * fallthrough INTO it without overhead, if it is last.
708 */
709 if (default_is_last) {
710 LLVMValueRef prevmask, defaultmask;
711 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
712 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
713 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
714 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
715 ctx->switch_in_default = true;
716
717 lp_exec_mask_update(mask);
718 }
719 else {
720 /*
721 * Technically, "case" immediately before default isn't really a
722 * fallthrough, however we still have to count them as such as we
723 * already have updated the masks.
724 * If that happens in practice could add a switch optimizer pass
725 * which just gets rid of all case statements appearing together with
726 * default (or could do switch analysis at switch start time instead).
727 */
728 enum tgsi_opcode opcode =
729 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
730 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
731 opcode != TGSI_OPCODE_SWITCH);
732 /*
733 * If it is not last statement and there was no fallthrough into it,
734 * we record the PC and continue execution at next case (again, those
735 * case encountered at the same time don't count). At endswitch
736 * time, we update switchmask, and go back executing the code we skipped
737 * until the next break (possibly re-executing some code with changed mask
738 * if there was a fallthrough out of default).
739 * Finally, if it is not last statement and there was a fallthrough into it,
740 * do the same as with the former case, except instead of skipping the code
741 * just execute it without updating the mask, then go back and re-execute.
742 */
743 ctx->switch_pc = bld_base->pc;
744 if (!ft_into) {
745 bld_base->pc = default_exec_pc;
746 }
747 }
748 }
749
750
751 /* stores val into an address pointed to by dst_ptr.
752 * mask->exec_mask is used to figure out which bits of val
753 * should be stored into the address
754 * (0 means don't store this bit, 1 means do store).
755 */
756 static void lp_exec_mask_store(struct lp_exec_mask *mask,
757 struct lp_build_context *bld_store,
758 LLVMValueRef val,
759 LLVMValueRef dst_ptr)
760 {
761 LLVMBuilderRef builder = mask->bld->gallivm->builder;
762 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
763
764 assert(lp_check_value(bld_store->type, val));
765 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
766 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
767 LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
768
769 if (exec_mask) {
770 LLVMValueRef res, dst;
771
772 dst = LLVMBuildLoad(builder, dst_ptr, "");
773 res = lp_build_select(bld_store, exec_mask, val, dst);
774 LLVMBuildStore(builder, res, dst_ptr);
775 } else
776 LLVMBuildStore(builder, val, dst_ptr);
777 }
778
779 static void lp_exec_mask_call(struct lp_exec_mask *mask,
780 int func,
781 int *pc)
782 {
783 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
784 return;
785 }
786
787 lp_exec_mask_function_init(mask, mask->function_stack_size);
788 mask->function_stack[mask->function_stack_size].pc = *pc;
789 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
790 mask->function_stack_size++;
791 *pc = func;
792 }
793
794 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
795 {
796 LLVMBuilderRef builder = mask->bld->gallivm->builder;
797 struct function_ctx *ctx = func_ctx(mask);
798 LLVMValueRef exec_mask;
799
800 if (ctx->cond_stack_size == 0 &&
801 ctx->loop_stack_size == 0 &&
802 ctx->switch_stack_size == 0 &&
803 mask->function_stack_size == 1) {
804 /* returning from main() */
805 *pc = -1;
806 return;
807 }
808
809 if (mask->function_stack_size == 1) {
810 /*
811 * This requires special handling since we need to ensure
812 * we don't drop the mask even if we have no call stack
813 * (e.g. after a ret in a if clause after the endif)
814 */
815 mask->ret_in_main = TRUE;
816 }
817
818 exec_mask = LLVMBuildNot(builder,
819 mask->exec_mask,
820 "ret");
821
822 mask->ret_mask = LLVMBuildAnd(builder,
823 mask->ret_mask,
824 exec_mask, "ret_full");
825
826 lp_exec_mask_update(mask);
827 }
828
829 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
830 {
831 }
832
833 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
834 {
835 struct function_ctx *ctx;
836
837 assert(mask->function_stack_size > 1);
838 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
839
840 ctx = func_ctx(mask);
841 mask->function_stack_size--;
842
843 *pc = ctx->pc;
844 mask->ret_mask = ctx->ret_mask;
845
846 lp_exec_mask_update(mask);
847 }
848
849
850 static LLVMValueRef
851 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
852 unsigned file,
853 int index,
854 unsigned chan)
855 {
856 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
857 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
858 LLVMValueRef var_of_array;
859
860 switch (file) {
861 case TGSI_FILE_TEMPORARY:
862 array_of_vars = bld->temps;
863 var_of_array = bld->temps_array;
864 break;
865 case TGSI_FILE_OUTPUT:
866 array_of_vars = bld->outputs;
867 var_of_array = bld->outputs_array;
868 break;
869 default:
870 assert(0);
871 return NULL;
872 }
873
874 assert(chan < 4);
875
876 if (bld->indirect_files & (1 << file)) {
877 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
878 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
879 LLVMValueRef gep[2];
880 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
881 gep[1] = lindex;
882 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
883 } else {
884 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
885 }
886 }
887 else {
888 assert(index <= bld->bld_base.info->file_max[file]);
889 return array_of_vars[index][chan];
890 }
891 }
892
893
894 /**
895 * Return pointer to a temporary register channel (src or dest).
896 * Note that indirect addressing cannot be handled here.
897 * \param index which temporary register
898 * \param chan which channel of the temp register.
899 */
900 LLVMValueRef
901 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
902 unsigned index,
903 unsigned chan)
904 {
905 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
906 }
907
908 /**
909 * Return pointer to a output register channel (src or dest).
910 * Note that indirect addressing cannot be handled here.
911 * \param index which output register
912 * \param chan which channel of the output register.
913 */
914 LLVMValueRef
915 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
916 unsigned index,
917 unsigned chan)
918 {
919 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
920 }
921
922 /*
923 * If we have indirect addressing in outputs copy our alloca array
924 * to the outputs slots specified by the caller to make sure
925 * our outputs are delivered consistently via the same interface.
926 */
927 static void
928 gather_outputs(struct lp_build_tgsi_soa_context * bld)
929 {
930 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
931 unsigned index, chan;
932 assert(bld->bld_base.info->num_outputs <=
933 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
934 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
935 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
936 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
937 }
938 }
939 }
940 }
941
942 /**
943 * Gather vector.
944 * XXX the lp_build_gather() function should be capable of doing this
945 * with a little work.
946 */
947 static LLVMValueRef
948 build_gather(struct lp_build_tgsi_context *bld_base,
949 LLVMValueRef base_ptr,
950 LLVMValueRef indexes,
951 LLVMValueRef overflow_mask,
952 LLVMValueRef indexes2)
953 {
954 struct gallivm_state *gallivm = bld_base->base.gallivm;
955 LLVMBuilderRef builder = gallivm->builder;
956 struct lp_build_context *uint_bld = &bld_base->uint_bld;
957 struct lp_build_context *bld = &bld_base->base;
958 LLVMValueRef res;
959 unsigned i;
960
961 if (indexes2)
962 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
963 else
964 res = bld->undef;
965 /*
966 * overflow_mask is a vector telling us which channels
967 * in the vector overflowed. We use the overflow behavior for
968 * constant buffers which is defined as:
969 * Out of bounds access to constant buffer returns 0 in all
970 * components. Out of bounds behavior is always with respect
971 * to the size of the buffer bound at that slot.
972 */
973
974 if (overflow_mask) {
975 /*
976 * We avoid per-element control flow here (also due to llvm going crazy,
977 * though I suspect it's better anyway since overflow is likely rare).
978 * Note that since we still fetch from buffers even if num_elements was
979 * zero (in this case we'll fetch from index zero) the jit func callers
980 * MUST provide valid fake constant buffers of size 4x32 (the values do
981 * not matter), otherwise we'd still need (not per element though)
982 * control flow.
983 */
984 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
985 if (indexes2)
986 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
987 }
988
989 /*
990 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
991 */
992 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
993 LLVMValueRef si, di;
994 LLVMValueRef index;
995 LLVMValueRef scalar_ptr, scalar;
996
997 di = lp_build_const_int32(bld->gallivm, i);
998 if (indexes2)
999 si = lp_build_const_int32(bld->gallivm, i >> 1);
1000 else
1001 si = di;
1002
1003 if (indexes2 && (i & 1)) {
1004 index = LLVMBuildExtractElement(builder,
1005 indexes2, si, "");
1006 } else {
1007 index = LLVMBuildExtractElement(builder,
1008 indexes, si, "");
1009 }
1010 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
1011 &index, 1, "gather_ptr");
1012 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1013
1014 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
1015 }
1016
1017 if (overflow_mask) {
1018 if (indexes2) {
1019 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
1020 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
1021 bld_base->dbl_bld.int_vec_type, "");
1022 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
1023 bld_base->dbl_bld.zero, res);
1024 } else
1025 res = lp_build_select(bld, overflow_mask, bld->zero, res);
1026 }
1027
1028 return res;
1029 }
1030
1031
1032 /**
1033 * Scatter/store vector.
1034 */
1035 static void
1036 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1037 LLVMValueRef base_ptr,
1038 LLVMValueRef indexes,
1039 LLVMValueRef values,
1040 struct lp_exec_mask *mask)
1041 {
1042 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1043 LLVMBuilderRef builder = gallivm->builder;
1044 unsigned i;
1045 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
1046
1047 /*
1048 * Loop over elements of index_vec, store scalar value.
1049 */
1050 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1051 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1052 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1053 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1054 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1055 LLVMValueRef scalar_pred = pred ?
1056 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1057
1058 if (0)
1059 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1060 ii, val, index, scalar_ptr);
1061
1062 if (scalar_pred) {
1063 LLVMValueRef real_val, dst_val;
1064 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1065 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1066 LLVMBuildStore(builder, real_val, scalar_ptr);
1067 }
1068 else {
1069 LLVMBuildStore(builder, val, scalar_ptr);
1070 }
1071 }
1072 }
1073
1074
1075 /**
1076 * Read the current value of the ADDR register, convert the floats to
1077 * ints, add the base index and return the vector of offsets.
1078 * The offsets will be used to index into the constant buffer or
1079 * temporary register file.
1080 */
1081 static LLVMValueRef
1082 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1083 unsigned reg_file, unsigned reg_index,
1084 const struct tgsi_ind_register *indirect_reg,
1085 int index_limit)
1086 {
1087 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1088 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1089 /* always use X component of address register */
1090 unsigned swizzle = indirect_reg->Swizzle;
1091 LLVMValueRef base;
1092 LLVMValueRef rel;
1093 LLVMValueRef max_index;
1094 LLVMValueRef index;
1095
1096 assert(bld->indirect_files & (1 << reg_file));
1097
1098 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1099
1100 assert(swizzle < 4);
1101 switch (indirect_reg->File) {
1102 case TGSI_FILE_ADDRESS:
1103 rel = LLVMBuildLoad(builder,
1104 bld->addr[indirect_reg->Index][swizzle],
1105 "load addr reg");
1106 /* ADDR LLVM values already have LLVM integer type. */
1107 break;
1108 case TGSI_FILE_TEMPORARY:
1109 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1110 rel = LLVMBuildLoad(builder, rel, "load temp reg");
1111 /* TEMP LLVM values always have LLVM float type, but for indirection, the
1112 * value actually stored is expected to be an integer */
1113 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1114 break;
1115 default:
1116 assert(0);
1117 rel = uint_bld->zero;
1118 }
1119
1120 index = lp_build_add(uint_bld, base, rel);
1121
1122 /*
1123 * emit_fetch_constant handles constant buffer overflow so this code
1124 * is pointless for them.
1125 * Furthermore the D3D10 spec in section 6.5 says:
1126 * If the constant buffer bound to a slot is larger than the size
1127 * declared in the shader for that slot, implementations are allowed
1128 * to return incorrect data (not necessarily 0) for indices that are
1129 * larger than the declared size but smaller than the buffer size.
1130 */
1131 if (reg_file != TGSI_FILE_CONSTANT) {
1132 assert(index_limit >= 0);
1133 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1134 uint_bld->type, index_limit);
1135
1136 assert(!uint_bld->type.sign);
1137 index = lp_build_min(uint_bld, index, max_index);
1138 }
1139
1140 return index;
1141 }
1142
1143 static struct lp_build_context *
1144 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1145 enum tgsi_opcode_type stype)
1146 {
1147 struct lp_build_context *bld_fetch;
1148
1149 switch (stype) {
1150 case TGSI_TYPE_FLOAT:
1151 case TGSI_TYPE_UNTYPED:
1152 bld_fetch = &bld_base->base;
1153 break;
1154 case TGSI_TYPE_UNSIGNED:
1155 bld_fetch = &bld_base->uint_bld;
1156 break;
1157 case TGSI_TYPE_SIGNED:
1158 bld_fetch = &bld_base->int_bld;
1159 break;
1160 case TGSI_TYPE_DOUBLE:
1161 bld_fetch = &bld_base->dbl_bld;
1162 break;
1163 case TGSI_TYPE_UNSIGNED64:
1164 bld_fetch = &bld_base->uint64_bld;
1165 break;
1166 case TGSI_TYPE_SIGNED64:
1167 bld_fetch = &bld_base->int64_bld;
1168 break;
1169 case TGSI_TYPE_VOID:
1170 default:
1171 assert(0);
1172 bld_fetch = NULL;
1173 break;
1174 }
1175 return bld_fetch;
1176 }
1177
1178 static LLVMValueRef
1179 get_soa_array_offsets(struct lp_build_context *uint_bld,
1180 LLVMValueRef indirect_index,
1181 unsigned chan_index,
1182 boolean need_perelement_offset)
1183 {
1184 struct gallivm_state *gallivm = uint_bld->gallivm;
1185 LLVMValueRef chan_vec =
1186 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1187 LLVMValueRef length_vec =
1188 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1189 LLVMValueRef index_vec;
1190
1191 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1192 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1193 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1194 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1195
1196 if (need_perelement_offset) {
1197 LLVMValueRef pixel_offsets;
1198 unsigned i;
1199 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1200 pixel_offsets = uint_bld->undef;
1201 for (i = 0; i < uint_bld->type.length; i++) {
1202 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1203 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1204 ii, ii, "");
1205 }
1206 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1207 }
1208 return index_vec;
1209 }
1210
1211 static LLVMValueRef
1212 emit_fetch_constant(
1213 struct lp_build_tgsi_context * bld_base,
1214 const struct tgsi_full_src_register * reg,
1215 enum tgsi_opcode_type stype,
1216 unsigned swizzle_in)
1217 {
1218 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1219 struct gallivm_state *gallivm = bld_base->base.gallivm;
1220 LLVMBuilderRef builder = gallivm->builder;
1221 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1222 unsigned dimension = 0;
1223 LLVMValueRef consts_ptr;
1224 LLVMValueRef num_consts;
1225 LLVMValueRef res;
1226 unsigned swizzle = swizzle_in & 0xffff;
1227
1228 /* XXX: Handle fetching xyzw components as a vector */
1229 assert(swizzle != ~0u);
1230
1231 if (reg->Register.Dimension) {
1232 assert(!reg->Dimension.Indirect);
1233 dimension = reg->Dimension.Index;
1234 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1235 }
1236
1237 consts_ptr = bld->consts[dimension];
1238 num_consts = bld->consts_sizes[dimension];
1239
1240 if (reg->Register.Indirect) {
1241 LLVMValueRef indirect_index;
1242 LLVMValueRef swizzle_vec =
1243 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1244 LLVMValueRef index_vec; /* index into the const buffer */
1245 LLVMValueRef overflow_mask;
1246 LLVMValueRef index_vec2 = NULL;
1247
1248 indirect_index = get_indirect_index(bld,
1249 reg->Register.File,
1250 reg->Register.Index,
1251 &reg->Indirect,
1252 bld->bld_base.info->file_max[reg->Register.File]);
1253
1254 /* All fetches are from the same constant buffer, so
1255 * we need to propagate the size to a vector to do a
1256 * vector comparison */
1257 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1258 /* Construct a boolean vector telling us which channels
1259 * overflow the bound constant buffer */
1260 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1261 indirect_index, num_consts);
1262
1263 /* index_vec = indirect_index * 4 + swizzle */
1264 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1265 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1266
1267 if (tgsi_type_is_64bit(stype)) {
1268 LLVMValueRef swizzle_vec2;
1269 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
1270 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
1271 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
1272 }
1273 /* Gather values from the constant buffer */
1274 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
1275 }
1276 else {
1277 LLVMValueRef index; /* index into the const buffer */
1278 LLVMValueRef scalar, scalar_ptr;
1279 struct lp_build_context *bld_broad = &bld_base->base;
1280 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1281
1282 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1283 &index, 1, "");
1284
1285 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
1286
1287 LLVMValueRef scalar2, scalar2_ptr;
1288 LLVMValueRef shuffles[2];
1289 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
1290
1291 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
1292 &index, 1, "");
1293
1294 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1295 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
1296 shuffles[0] = lp_build_const_int32(gallivm, 0);
1297 shuffles[1] = lp_build_const_int32(gallivm, 1);
1298
1299 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
1300 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
1301 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
1302 } else {
1303 if (stype == TGSI_TYPE_DOUBLE) {
1304 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
1305 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
1306 bld_broad = &bld_base->dbl_bld;
1307 } else if (stype == TGSI_TYPE_UNSIGNED64) {
1308 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1309 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
1310 bld_broad = &bld_base->uint64_bld;
1311 } else if (stype == TGSI_TYPE_SIGNED64) {
1312 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
1313 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
1314 bld_broad = &bld_base->int64_bld;
1315 }
1316 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1317 res = lp_build_broadcast_scalar(bld_broad, scalar);
1318 }
1319
1320 }
1321
1322 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
1323 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1324 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1325 }
1326
1327 return res;
1328 }
1329
1330 /**
1331 * Fetch 64-bit values from two separate channels.
1332 * 64-bit values are stored split across two channels, like xy and zw.
1333 * This function creates a set of vec_length*2 floats,
1334 * extracts the values from the two channels,
1335 * puts them in the correct place, then casts to vec_length 64-bits.
1336 */
1337 static LLVMValueRef
1338 emit_fetch_64bit(
1339 struct lp_build_tgsi_context * bld_base,
1340 enum tgsi_opcode_type stype,
1341 LLVMValueRef input,
1342 LLVMValueRef input2)
1343 {
1344 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1345 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1346 LLVMBuilderRef builder = gallivm->builder;
1347 LLVMValueRef res;
1348 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1349 int i;
1350 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
1351 int len = bld_base->base.type.length * 2;
1352 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
1353
1354 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
1355 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
1356 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
1357 }
1358 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
1359
1360 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1361 }
1362
1363 static LLVMValueRef
1364 emit_fetch_immediate(
1365 struct lp_build_tgsi_context * bld_base,
1366 const struct tgsi_full_src_register * reg,
1367 enum tgsi_opcode_type stype,
1368 unsigned swizzle_in)
1369 {
1370 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1371 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1372 LLVMBuilderRef builder = gallivm->builder;
1373 LLVMValueRef res = NULL;
1374 unsigned swizzle = swizzle_in & 0xffff;
1375
1376 if (bld->use_immediates_array || reg->Register.Indirect) {
1377 LLVMValueRef imms_array;
1378 LLVMTypeRef fptr_type;
1379
1380 /* cast imms_array pointer to float* */
1381 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1382 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1383
1384 if (reg->Register.Indirect) {
1385 LLVMValueRef indirect_index;
1386 LLVMValueRef index_vec; /* index into the immediate register array */
1387 LLVMValueRef index_vec2 = NULL;
1388 indirect_index = get_indirect_index(bld,
1389 reg->Register.File,
1390 reg->Register.Index,
1391 &reg->Indirect,
1392 bld->bld_base.info->file_max[reg->Register.File]);
1393 /*
1394 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1395 * immediates are stored as full vectors (FIXME??? - might be better
1396 * to store them the same as constants) but all elements are the same
1397 * in any case.
1398 */
1399 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1400 indirect_index,
1401 swizzle,
1402 FALSE);
1403 if (tgsi_type_is_64bit(stype))
1404 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1405 indirect_index,
1406 swizzle_in >> 16,
1407 FALSE);
1408 /* Gather values from the immediate register array */
1409 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1410 } else {
1411 LLVMValueRef gep[2];
1412 gep[0] = lp_build_const_int32(gallivm, 0);
1413 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1414 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1415 bld->imms_array, gep, 2, "");
1416 res = LLVMBuildLoad(builder, imms_ptr, "");
1417
1418 if (tgsi_type_is_64bit(stype)) {
1419 LLVMValueRef imms_ptr2;
1420 LLVMValueRef res2;
1421 gep[1] = lp_build_const_int32(gallivm,
1422 reg->Register.Index * 4 + (swizzle_in >> 16));
1423 imms_ptr2 = LLVMBuildGEP(builder,
1424 bld->imms_array, gep, 2, "");
1425 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1426 res = emit_fetch_64bit(bld_base, stype, res, res2);
1427 }
1428 }
1429 }
1430 else {
1431 res = bld->immediates[reg->Register.Index][swizzle];
1432 if (tgsi_type_is_64bit(stype))
1433 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1434 }
1435
1436 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1437 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1438 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1439 }
1440 return res;
1441 }
1442
1443 static LLVMValueRef
1444 emit_fetch_input(
1445 struct lp_build_tgsi_context * bld_base,
1446 const struct tgsi_full_src_register * reg,
1447 enum tgsi_opcode_type stype,
1448 unsigned swizzle_in)
1449 {
1450 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1451 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1452 LLVMBuilderRef builder = gallivm->builder;
1453 LLVMValueRef res;
1454 unsigned swizzle = swizzle_in & 0xffff;
1455
1456 if (reg->Register.Indirect) {
1457 LLVMValueRef indirect_index;
1458 LLVMValueRef index_vec; /* index into the input reg array */
1459 LLVMValueRef index_vec2 = NULL;
1460 LLVMValueRef inputs_array;
1461 LLVMTypeRef fptr_type;
1462
1463 indirect_index = get_indirect_index(bld,
1464 reg->Register.File,
1465 reg->Register.Index,
1466 &reg->Indirect,
1467 bld->bld_base.info->file_max[reg->Register.File]);
1468
1469 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1470 indirect_index,
1471 swizzle,
1472 TRUE);
1473 if (tgsi_type_is_64bit(stype)) {
1474 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1475 indirect_index,
1476 swizzle_in >> 16,
1477 TRUE);
1478 }
1479 /* cast inputs_array pointer to float* */
1480 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1481 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1482
1483 /* Gather values from the input register array */
1484 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1485 } else {
1486 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1487 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1488 reg->Register.Index * 4 + swizzle);
1489 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1490 bld->inputs_array, &lindex, 1, "");
1491
1492 res = LLVMBuildLoad(builder, input_ptr, "");
1493 if (tgsi_type_is_64bit(stype)) {
1494 LLVMValueRef lindex1;
1495 LLVMValueRef input_ptr2;
1496 LLVMValueRef res2;
1497
1498 lindex1 = lp_build_const_int32(gallivm,
1499 reg->Register.Index * 4 + (swizzle_in >> 16));
1500 input_ptr2 = LLVMBuildGEP(builder,
1501 bld->inputs_array, &lindex1, 1, "");
1502 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1503 res = emit_fetch_64bit(bld_base, stype, res, res2);
1504 }
1505 }
1506 else {
1507 res = bld->inputs[reg->Register.Index][swizzle];
1508 if (tgsi_type_is_64bit(stype))
1509 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1510 }
1511 }
1512
1513 assert(res);
1514
1515 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1516 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1517 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1518 }
1519
1520 return res;
1521 }
1522
1523
1524 static LLVMValueRef
1525 emit_fetch_gs_input(
1526 struct lp_build_tgsi_context * bld_base,
1527 const struct tgsi_full_src_register * reg,
1528 enum tgsi_opcode_type stype,
1529 unsigned swizzle_in)
1530 {
1531 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1532 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1533 const struct tgsi_shader_info *info = bld->bld_base.info;
1534 LLVMBuilderRef builder = gallivm->builder;
1535 LLVMValueRef attrib_index = NULL;
1536 LLVMValueRef vertex_index = NULL;
1537 unsigned swizzle = swizzle_in & 0xffff;
1538 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1539 LLVMValueRef res;
1540
1541 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1542 /* This is really a system value not a regular input */
1543 assert(!reg->Register.Indirect);
1544 assert(!reg->Dimension.Indirect);
1545 res = bld->system_values.prim_id;
1546 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1547 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1548 }
1549 return res;
1550 }
1551
1552 if (reg->Register.Indirect) {
1553 /*
1554 * XXX: this is possibly not quite the right value, since file_max may be
1555 * larger than the max attrib index, due to it being the max of declared
1556 * inputs AND the max vertices per prim (which is 6 for tri adj).
1557 * It should however be safe to use (since we always allocate
1558 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1559 */
1560 int index_limit = info->file_max[reg->Register.File];
1561 attrib_index = get_indirect_index(bld,
1562 reg->Register.File,
1563 reg->Register.Index,
1564 &reg->Indirect,
1565 index_limit);
1566 } else {
1567 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1568 }
1569
1570 if (reg->Dimension.Indirect) {
1571 /*
1572 * A fixed 6 should do as well (which is what we allocate).
1573 */
1574 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1575 vertex_index = get_indirect_index(bld,
1576 reg->Register.File,
1577 reg->Dimension.Index,
1578 &reg->DimIndirect,
1579 index_limit);
1580 } else {
1581 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1582 }
1583
1584 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1585 reg->Dimension.Indirect,
1586 vertex_index,
1587 reg->Register.Indirect,
1588 attrib_index,
1589 swizzle_index);
1590
1591 assert(res);
1592 if (tgsi_type_is_64bit(stype)) {
1593 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1594 LLVMValueRef res2;
1595 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1596 reg->Dimension.Indirect,
1597 vertex_index,
1598 reg->Register.Indirect,
1599 attrib_index,
1600 swizzle_index);
1601 assert(res2);
1602 res = emit_fetch_64bit(bld_base, stype, res, res2);
1603 } else if (stype == TGSI_TYPE_UNSIGNED) {
1604 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1605 } else if (stype == TGSI_TYPE_SIGNED) {
1606 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1607 }
1608
1609 return res;
1610 }
1611
1612 static LLVMValueRef
1613 emit_fetch_temporary(
1614 struct lp_build_tgsi_context * bld_base,
1615 const struct tgsi_full_src_register * reg,
1616 enum tgsi_opcode_type stype,
1617 unsigned swizzle_in)
1618 {
1619 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1620 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1621 LLVMBuilderRef builder = gallivm->builder;
1622 LLVMValueRef res;
1623 unsigned swizzle = swizzle_in & 0xffff;
1624
1625 if (reg->Register.Indirect) {
1626 LLVMValueRef indirect_index;
1627 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1628 LLVMValueRef temps_array;
1629 LLVMTypeRef fptr_type;
1630
1631 indirect_index = get_indirect_index(bld,
1632 reg->Register.File,
1633 reg->Register.Index,
1634 &reg->Indirect,
1635 bld->bld_base.info->file_max[reg->Register.File]);
1636
1637 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1638 indirect_index,
1639 swizzle,
1640 TRUE);
1641 if (tgsi_type_is_64bit(stype)) {
1642 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1643 indirect_index,
1644 swizzle_in >> 16,
1645 TRUE);
1646 }
1647
1648 /* cast temps_array pointer to float* */
1649 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1650 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1651
1652 /* Gather values from the temporary register array */
1653 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1654 }
1655 else {
1656 LLVMValueRef temp_ptr;
1657 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1658 res = LLVMBuildLoad(builder, temp_ptr, "");
1659
1660 if (tgsi_type_is_64bit(stype)) {
1661 LLVMValueRef temp_ptr2, res2;
1662
1663 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1664 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1665 res = emit_fetch_64bit(bld_base, stype, res, res2);
1666 }
1667 }
1668
1669 if (stype == TGSI_TYPE_SIGNED ||
1670 stype == TGSI_TYPE_UNSIGNED ||
1671 stype == TGSI_TYPE_DOUBLE ||
1672 stype == TGSI_TYPE_SIGNED64 ||
1673 stype == TGSI_TYPE_UNSIGNED64) {
1674 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1675 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1676 }
1677
1678 return res;
1679 }
1680
1681 static LLVMValueRef
1682 emit_fetch_system_value(
1683 struct lp_build_tgsi_context * bld_base,
1684 const struct tgsi_full_src_register * reg,
1685 enum tgsi_opcode_type stype,
1686 unsigned swizzle_in)
1687 {
1688 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1689 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1690 const struct tgsi_shader_info *info = bld->bld_base.info;
1691 LLVMBuilderRef builder = gallivm->builder;
1692 LLVMValueRef res;
1693 enum tgsi_opcode_type atype; // Actual type of the value
1694 unsigned swizzle = swizzle_in & 0xffff;
1695
1696 assert(!reg->Register.Indirect);
1697
1698 switch (info->system_value_semantic_name[reg->Register.Index]) {
1699 case TGSI_SEMANTIC_INSTANCEID:
1700 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1701 atype = TGSI_TYPE_UNSIGNED;
1702 break;
1703
1704 case TGSI_SEMANTIC_VERTEXID:
1705 res = bld->system_values.vertex_id;
1706 atype = TGSI_TYPE_UNSIGNED;
1707 break;
1708
1709 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1710 res = bld->system_values.vertex_id_nobase;
1711 atype = TGSI_TYPE_UNSIGNED;
1712 break;
1713
1714 case TGSI_SEMANTIC_BASEVERTEX:
1715 res = bld->system_values.basevertex;
1716 atype = TGSI_TYPE_UNSIGNED;
1717 break;
1718
1719 case TGSI_SEMANTIC_PRIMID:
1720 res = bld->system_values.prim_id;
1721 atype = TGSI_TYPE_UNSIGNED;
1722 break;
1723
1724 case TGSI_SEMANTIC_INVOCATIONID:
1725 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1726 atype = TGSI_TYPE_UNSIGNED;
1727 break;
1728
1729 case TGSI_SEMANTIC_HELPER_INVOCATION:
1730 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1731 atype = TGSI_TYPE_UNSIGNED;
1732 break;
1733
1734 case TGSI_SEMANTIC_THREAD_ID:
1735 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1736 atype = TGSI_TYPE_UNSIGNED;
1737 break;
1738
1739 case TGSI_SEMANTIC_BLOCK_ID:
1740 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1741 atype = TGSI_TYPE_UNSIGNED;
1742 break;
1743
1744 case TGSI_SEMANTIC_GRID_SIZE:
1745 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1746 atype = TGSI_TYPE_UNSIGNED;
1747 break;
1748
1749 default:
1750 assert(!"unexpected semantic in emit_fetch_system_value");
1751 res = bld_base->base.zero;
1752 atype = TGSI_TYPE_FLOAT;
1753 break;
1754 }
1755
1756 if (atype != stype) {
1757 if (stype == TGSI_TYPE_FLOAT) {
1758 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1759 } else if (stype == TGSI_TYPE_UNSIGNED) {
1760 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1761 } else if (stype == TGSI_TYPE_SIGNED) {
1762 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1763 }
1764 }
1765
1766 return res;
1767 }
1768
1769 /**
1770 * Register fetch with derivatives.
1771 */
1772 static void
1773 emit_fetch_deriv(
1774 struct lp_build_tgsi_soa_context *bld,
1775 LLVMValueRef src,
1776 LLVMValueRef *res,
1777 LLVMValueRef *ddx,
1778 LLVMValueRef *ddy)
1779 {
1780 if (res)
1781 *res = src;
1782
1783 /* TODO: use interpolation coeffs for inputs */
1784
1785 if (ddx)
1786 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1787
1788 if (ddy)
1789 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1790 }
1791
1792 /**
1793 * store an array of vec-length 64-bit into two arrays of vec_length floats
1794 * i.e.
1795 * value is d0, d1, d2, d3 etc.
1796 * each 64-bit has high and low pieces x, y
1797 * so gets stored into the separate channels as:
1798 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1799 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1800 */
1801 static void
1802 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1803 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1804 LLVMValueRef value)
1805 {
1806 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1807 struct gallivm_state *gallivm = bld_base->base.gallivm;
1808 LLVMBuilderRef builder = gallivm->builder;
1809 struct lp_build_context *float_bld = &bld_base->base;
1810 unsigned i;
1811 LLVMValueRef temp, temp2;
1812 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1813 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1814
1815 for (i = 0; i < bld_base->base.type.length; i++) {
1816 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1817 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1818 }
1819
1820 temp = LLVMBuildShuffleVector(builder, value,
1821 LLVMGetUndef(LLVMTypeOf(value)),
1822 LLVMConstVector(shuffles,
1823 bld_base->base.type.length),
1824 "");
1825 temp2 = LLVMBuildShuffleVector(builder, value,
1826 LLVMGetUndef(LLVMTypeOf(value)),
1827 LLVMConstVector(shuffles2,
1828 bld_base->base.type.length),
1829 "");
1830
1831 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1832 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1833 }
1834
1835 /**
1836 * Register store.
1837 */
1838 static void
1839 emit_store_chan(
1840 struct lp_build_tgsi_context *bld_base,
1841 const struct tgsi_full_instruction *inst,
1842 unsigned index,
1843 unsigned chan_index,
1844 LLVMValueRef value)
1845 {
1846 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1847 struct gallivm_state *gallivm = bld_base->base.gallivm;
1848 LLVMBuilderRef builder = gallivm->builder;
1849 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1850 struct lp_build_context *float_bld = &bld_base->base;
1851 struct lp_build_context *int_bld = &bld_base->int_bld;
1852 LLVMValueRef indirect_index = NULL;
1853 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1854
1855 /*
1856 * Apply saturation.
1857 *
1858 * It is always assumed to be float.
1859 */
1860 if (inst->Instruction.Saturate) {
1861 assert(dtype == TGSI_TYPE_FLOAT ||
1862 dtype == TGSI_TYPE_UNTYPED);
1863 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1864 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1865 }
1866
1867 if (reg->Register.Indirect) {
1868 /*
1869 * Currently the mesa/st doesn't generate indirect stores
1870 * to 64-bit values, it normally uses MOV to do indirect stores.
1871 */
1872 assert(!tgsi_type_is_64bit(dtype));
1873 indirect_index = get_indirect_index(bld,
1874 reg->Register.File,
1875 reg->Register.Index,
1876 &reg->Indirect,
1877 bld->bld_base.info->file_max[reg->Register.File]);
1878 } else {
1879 assert(reg->Register.Index <=
1880 bld_base->info->file_max[reg->Register.File]);
1881 }
1882
1883 if (DEBUG_EXECUTION) {
1884 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1885 }
1886
1887 switch( reg->Register.File ) {
1888 case TGSI_FILE_OUTPUT:
1889 /* Outputs are always stored as floats */
1890 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1891
1892 if (reg->Register.Indirect) {
1893 LLVMValueRef index_vec; /* indexes into the output registers */
1894 LLVMValueRef outputs_array;
1895 LLVMTypeRef fptr_type;
1896
1897 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1898 indirect_index,
1899 chan_index,
1900 TRUE);
1901
1902 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1903 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1904
1905 /* Scatter store values into output registers */
1906 emit_mask_scatter(bld, outputs_array, index_vec, value,
1907 &bld->exec_mask);
1908 }
1909 else {
1910 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1911 chan_index);
1912
1913 if (tgsi_type_is_64bit(dtype)) {
1914 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1915 chan_index + 1);
1916 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1917 value);
1918 } else
1919 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1920 }
1921 break;
1922
1923 case TGSI_FILE_TEMPORARY:
1924 /* Temporaries are always stored as floats */
1925 if (!tgsi_type_is_64bit(dtype))
1926 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1927 else
1928 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1929
1930 if (reg->Register.Indirect) {
1931 LLVMValueRef index_vec; /* indexes into the temp registers */
1932 LLVMValueRef temps_array;
1933 LLVMTypeRef fptr_type;
1934
1935 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1936 indirect_index,
1937 chan_index,
1938 TRUE);
1939
1940 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1941 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1942
1943 /* Scatter store values into temp registers */
1944 emit_mask_scatter(bld, temps_array, index_vec, value,
1945 &bld->exec_mask);
1946 }
1947 else {
1948 LLVMValueRef temp_ptr;
1949 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1950
1951 if (tgsi_type_is_64bit(dtype)) {
1952 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1953 reg->Register.Index,
1954 chan_index + 1);
1955 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1956 value);
1957 }
1958 else
1959 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1960 }
1961 break;
1962
1963 case TGSI_FILE_ADDRESS:
1964 assert(dtype == TGSI_TYPE_SIGNED);
1965 assert(LLVMTypeOf(value) == int_bld->vec_type);
1966 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1967 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1968 bld->addr[reg->Register.Index][chan_index]);
1969 break;
1970
1971 default:
1972 assert( 0 );
1973 }
1974
1975 (void)dtype;
1976 }
1977
1978 /*
1979 * Called at the beginning of the translation of each TGSI instruction, to
1980 * emit some debug code.
1981 */
1982 static void
1983 emit_debug(
1984 struct lp_build_tgsi_context * bld_base,
1985 const struct tgsi_full_instruction * inst,
1986 const struct tgsi_opcode_info * info)
1987
1988 {
1989 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1990
1991 if (DEBUG_EXECUTION) {
1992 /*
1993 * Dump the TGSI instruction.
1994 */
1995
1996 struct gallivm_state *gallivm = bld_base->base.gallivm;
1997 char buf[512];
1998 buf[0] = '$';
1999 buf[1] = ' ';
2000 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
2001 lp_build_printf(gallivm, buf);
2002
2003 /* Dump the execution mask.
2004 */
2005 if (bld->exec_mask.has_mask) {
2006 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
2007 }
2008 }
2009 }
2010
2011 static void
2012 emit_store(
2013 struct lp_build_tgsi_context * bld_base,
2014 const struct tgsi_full_instruction * inst,
2015 const struct tgsi_opcode_info * info,
2016 unsigned index,
2017 LLVMValueRef dst[4])
2018
2019 {
2020 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
2021
2022 unsigned writemask = inst->Dst[index].Register.WriteMask;
2023 while (writemask) {
2024 unsigned chan_index = u_bit_scan(&writemask);
2025 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
2026 continue;
2027 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
2028 }
2029 }
2030
2031 static unsigned
2032 tgsi_to_pipe_tex_target(unsigned tgsi_target)
2033 {
2034 switch (tgsi_target) {
2035 case TGSI_TEXTURE_BUFFER:
2036 return PIPE_BUFFER;
2037 case TGSI_TEXTURE_1D:
2038 case TGSI_TEXTURE_SHADOW1D:
2039 return PIPE_TEXTURE_1D;
2040 case TGSI_TEXTURE_2D:
2041 case TGSI_TEXTURE_SHADOW2D:
2042 case TGSI_TEXTURE_2D_MSAA:
2043 return PIPE_TEXTURE_2D;
2044 case TGSI_TEXTURE_3D:
2045 return PIPE_TEXTURE_3D;
2046 case TGSI_TEXTURE_CUBE:
2047 case TGSI_TEXTURE_SHADOWCUBE:
2048 return PIPE_TEXTURE_CUBE;
2049 case TGSI_TEXTURE_RECT:
2050 case TGSI_TEXTURE_SHADOWRECT:
2051 return PIPE_TEXTURE_RECT;
2052 case TGSI_TEXTURE_1D_ARRAY:
2053 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2054 return PIPE_TEXTURE_1D_ARRAY;
2055 case TGSI_TEXTURE_2D_ARRAY:
2056 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2057 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2058 return PIPE_TEXTURE_2D_ARRAY;
2059 case TGSI_TEXTURE_CUBE_ARRAY:
2060 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2061 return PIPE_TEXTURE_CUBE_ARRAY;
2062 default:
2063 assert(0);
2064 return PIPE_BUFFER;
2065 }
2066 }
2067
2068
2069 static enum lp_sampler_lod_property
2070 lp_build_lod_property(
2071 struct lp_build_tgsi_context *bld_base,
2072 const struct tgsi_full_instruction *inst,
2073 unsigned src_op)
2074 {
2075 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2076 enum lp_sampler_lod_property lod_property;
2077
2078 /*
2079 * Not much we can do here. We could try catching inputs declared
2080 * with constant interpolation but not sure it's worth it - since for
2081 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2082 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2083 * like the constant/immediate recognition below.
2084 * What seems to be of more value would be to recognize temps holding
2085 * broadcasted scalars but no way we can do it.
2086 * Tried asking llvm but without any success (using LLVMIsConstant
2087 * even though this isn't exactly what we'd need), even as simple as
2088 * IMM[0] UINT32 (0,-1,0,0)
2089 * MOV TEMP[0] IMM[0].yyyy
2090 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2091 * doesn't work.
2092 * This means there's ZERO chance this will ever catch a scalar lod
2093 * with traditional tex opcodes as well as texel fetches, since the lod
2094 * comes from the same reg as coords (except some test shaders using
2095 * constant coords maybe).
2096 * There's at least hope for sample opcodes as well as size queries.
2097 */
2098 if (reg->Register.File == TGSI_FILE_CONSTANT ||
2099 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2100 lod_property = LP_SAMPLER_LOD_SCALAR;
2101 }
2102 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2103 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2104 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2105 }
2106 else {
2107 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2108 }
2109 }
2110 else {
2111 /* never use scalar (per-quad) lod the results are just too wrong. */
2112 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2113 }
2114 return lod_property;
2115 }
2116
2117
2118 /**
2119 * High-level instruction translators.
2120 */
2121
2122 static void
2123 emit_tex( struct lp_build_tgsi_soa_context *bld,
2124 const struct tgsi_full_instruction *inst,
2125 enum lp_build_tex_modifier modifier,
2126 LLVMValueRef *texel,
2127 unsigned sampler_reg,
2128 enum lp_sampler_op_type sampler_op)
2129 {
2130 unsigned unit = inst->Src[sampler_reg].Register.Index;
2131 LLVMValueRef oow = NULL;
2132 LLVMValueRef lod = NULL;
2133 LLVMValueRef coords[5];
2134 LLVMValueRef offsets[3] = { NULL };
2135 struct lp_derivatives derivs;
2136 struct lp_sampler_params params;
2137 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2138 unsigned num_derivs, num_offsets, i;
2139 unsigned shadow_coord = 0;
2140 unsigned layer_coord = 0;
2141 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2142
2143 memset(&params, 0, sizeof(params));
2144
2145 if (!bld->sampler) {
2146 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2147 for (i = 0; i < 4; i++) {
2148 texel[i] = bld->bld_base.base.undef;
2149 }
2150 return;
2151 }
2152
2153 switch (inst->Texture.Texture) {
2154 case TGSI_TEXTURE_1D_ARRAY:
2155 layer_coord = 1;
2156 /* fallthrough */
2157 case TGSI_TEXTURE_1D:
2158 num_offsets = 1;
2159 num_derivs = 1;
2160 break;
2161 case TGSI_TEXTURE_2D_ARRAY:
2162 layer_coord = 2;
2163 /* fallthrough */
2164 case TGSI_TEXTURE_2D:
2165 case TGSI_TEXTURE_RECT:
2166 num_offsets = 2;
2167 num_derivs = 2;
2168 break;
2169 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2170 layer_coord = 1;
2171 /* fallthrough */
2172 case TGSI_TEXTURE_SHADOW1D:
2173 shadow_coord = 2;
2174 num_offsets = 1;
2175 num_derivs = 1;
2176 break;
2177 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2178 layer_coord = 2;
2179 shadow_coord = 3;
2180 num_offsets = 2;
2181 num_derivs = 2;
2182 break;
2183 case TGSI_TEXTURE_SHADOW2D:
2184 case TGSI_TEXTURE_SHADOWRECT:
2185 shadow_coord = 2;
2186 num_offsets = 2;
2187 num_derivs = 2;
2188 break;
2189 case TGSI_TEXTURE_CUBE:
2190 num_offsets = 2;
2191 num_derivs = 3;
2192 break;
2193 case TGSI_TEXTURE_3D:
2194 num_offsets = 3;
2195 num_derivs = 3;
2196 break;
2197 case TGSI_TEXTURE_SHADOWCUBE:
2198 shadow_coord = 3;
2199 num_offsets = 2;
2200 num_derivs = 3;
2201 break;
2202 case TGSI_TEXTURE_CUBE_ARRAY:
2203 num_offsets = 2;
2204 num_derivs = 3;
2205 layer_coord = 3;
2206 break;
2207 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2208 num_offsets = 2;
2209 num_derivs = 3;
2210 layer_coord = 3;
2211 shadow_coord = 4; /* shadow coord special different reg */
2212 break;
2213 case TGSI_TEXTURE_2D_MSAA:
2214 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2215 default:
2216 assert(0);
2217 return;
2218 }
2219
2220 /* Note lod and especially projected are illegal in a LOT of cases */
2221 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2222 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2223 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2224 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2225 /* note that shadow cube array with bias/explicit lod does not exist */
2226 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2227 }
2228 else {
2229 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2230 }
2231 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2232 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2233 }
2234 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2235 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2236 }
2237 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2238 }
2239
2240 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2241 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2242 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2243 }
2244 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2245 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2246 oow = lp_build_rcp(&bld->bld_base.base, oow);
2247 }
2248
2249 for (i = 0; i < num_derivs; i++) {
2250 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2251 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2252 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2253 }
2254 for (i = num_derivs; i < 5; i++) {
2255 coords[i] = bld->bld_base.base.undef;
2256 }
2257
2258 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2259 if (layer_coord) {
2260 if (layer_coord == 3) {
2261 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2262 }
2263 else {
2264 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2265 }
2266 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2267 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2268 }
2269 /* Shadow coord occupies always 5th slot. */
2270 if (shadow_coord) {
2271 sample_key |= LP_SAMPLER_SHADOW;
2272 if (shadow_coord == 4) {
2273 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2274 }
2275 else {
2276 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2277 }
2278 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2279 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2280 }
2281
2282 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2283 unsigned dim;
2284 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2285 for (dim = 0; dim < num_derivs; ++dim) {
2286 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2287 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2288 }
2289 params.derivs = &derivs;
2290 /*
2291 * could also check all src regs if constant but I doubt such
2292 * cases exist in practice.
2293 */
2294 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2295 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2296 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2297 }
2298 else {
2299 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2300 }
2301 }
2302 else {
2303 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2304 }
2305 }
2306 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2307
2308 /* we don't handle the 4 offset version of tg4 */
2309 if (inst->Texture.NumOffsets == 1) {
2310 unsigned dim;
2311 sample_key |= LP_SAMPLER_OFFSETS;
2312 for (dim = 0; dim < num_offsets; dim++) {
2313 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2314 }
2315 }
2316
2317 params.type = bld->bld_base.base.type;
2318 params.sample_key = sample_key;
2319 params.texture_index = unit;
2320 params.sampler_index = unit;
2321 params.context_ptr = bld->context_ptr;
2322 params.thread_data_ptr = bld->thread_data_ptr;
2323 params.coords = coords;
2324 params.offsets = offsets;
2325 params.lod = lod;
2326 params.texel = texel;
2327
2328 bld->sampler->emit_tex_sample(bld->sampler,
2329 bld->bld_base.base.gallivm,
2330 &params);
2331 }
2332
2333 static void
2334 emit_sample(struct lp_build_tgsi_soa_context *bld,
2335 const struct tgsi_full_instruction *inst,
2336 enum lp_build_tex_modifier modifier,
2337 boolean compare,
2338 enum lp_sampler_op_type sample_type,
2339 LLVMValueRef *texel)
2340 {
2341 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2342 unsigned texture_unit, sampler_unit;
2343 LLVMValueRef lod = NULL;
2344 LLVMValueRef coords[5];
2345 LLVMValueRef offsets[3] = { NULL };
2346 struct lp_derivatives derivs;
2347 struct lp_sampler_params params;
2348 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2349
2350 unsigned num_offsets, num_derivs, i;
2351 unsigned layer_coord = 0;
2352 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2353
2354 memset(&params, 0, sizeof(params));
2355
2356 if (!bld->sampler) {
2357 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2358 for (i = 0; i < 4; i++) {
2359 texel[i] = bld->bld_base.base.undef;
2360 }
2361 return;
2362 }
2363
2364 /*
2365 * unlike old-style tex opcodes the texture/sampler indices
2366 * always come from src1 and src2 respectively.
2367 */
2368 texture_unit = inst->Src[1].Register.Index;
2369 sampler_unit = inst->Src[2].Register.Index;
2370
2371 /*
2372 * Note inst->Texture.Texture will contain the number of offsets,
2373 * however the target information is NOT there and comes from the
2374 * declared sampler views instead.
2375 */
2376 switch (bld->sv[texture_unit].Resource) {
2377 case TGSI_TEXTURE_1D:
2378 num_offsets = 1;
2379 num_derivs = 1;
2380 break;
2381 case TGSI_TEXTURE_1D_ARRAY:
2382 layer_coord = 1;
2383 num_offsets = 1;
2384 num_derivs = 1;
2385 break;
2386 case TGSI_TEXTURE_2D:
2387 case TGSI_TEXTURE_RECT:
2388 num_offsets = 2;
2389 num_derivs = 2;
2390 break;
2391 case TGSI_TEXTURE_2D_ARRAY:
2392 layer_coord = 2;
2393 num_offsets = 2;
2394 num_derivs = 2;
2395 break;
2396 case TGSI_TEXTURE_CUBE:
2397 num_offsets = 2;
2398 num_derivs = 3;
2399 break;
2400 case TGSI_TEXTURE_3D:
2401 num_offsets = 3;
2402 num_derivs = 3;
2403 break;
2404 case TGSI_TEXTURE_CUBE_ARRAY:
2405 layer_coord = 3;
2406 num_offsets = 2;
2407 num_derivs = 3;
2408 break;
2409 default:
2410 assert(0);
2411 return;
2412 }
2413
2414 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2415 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2416 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2417 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2418 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2419 }
2420 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2421 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2422 }
2423 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2424 }
2425 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2426 /* XXX might be better to explicitly pass the level zero information */
2427 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2428 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2429 }
2430
2431 for (i = 0; i < num_derivs; i++) {
2432 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2433 }
2434 for (i = num_derivs; i < 5; i++) {
2435 coords[i] = bld->bld_base.base.undef;
2436 }
2437
2438 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2439 if (layer_coord) {
2440 if (layer_coord == 3)
2441 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2442 else
2443 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2444 }
2445 /* Shadow coord occupies always 5th slot. */
2446 if (compare) {
2447 sample_key |= LP_SAMPLER_SHADOW;
2448 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2449 }
2450
2451 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2452 unsigned dim;
2453 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2454 for (dim = 0; dim < num_derivs; ++dim) {
2455 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2456 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2457 }
2458 params.derivs = &derivs;
2459 /*
2460 * could also check all src regs if constant but I doubt such
2461 * cases exist in practice.
2462 */
2463 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2464 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2465 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2466 }
2467 else {
2468 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2469 }
2470 }
2471 else {
2472 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2473 }
2474 }
2475
2476 /* some advanced gather instructions (txgo) would require 4 offsets */
2477 if (inst->Texture.NumOffsets == 1) {
2478 unsigned dim;
2479 sample_key |= LP_SAMPLER_OFFSETS;
2480 for (dim = 0; dim < num_offsets; dim++) {
2481 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2482 }
2483 }
2484 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2485
2486 params.type = bld->bld_base.base.type;
2487 params.sample_key = sample_key;
2488 params.texture_index = texture_unit;
2489 params.sampler_index = sampler_unit;
2490 params.context_ptr = bld->context_ptr;
2491 params.thread_data_ptr = bld->thread_data_ptr;
2492 params.coords = coords;
2493 params.offsets = offsets;
2494 params.lod = lod;
2495 params.texel = texel;
2496
2497 bld->sampler->emit_tex_sample(bld->sampler,
2498 bld->bld_base.base.gallivm,
2499 &params);
2500
2501 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2502 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2503 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2504 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2505 unsigned char swizzles[4];
2506 swizzles[0] = inst->Src[1].Register.SwizzleX;
2507 swizzles[1] = inst->Src[1].Register.SwizzleY;
2508 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2509 swizzles[3] = inst->Src[1].Register.SwizzleW;
2510
2511 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2512 }
2513 }
2514
2515 static void
2516 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2517 const struct tgsi_full_instruction *inst,
2518 LLVMValueRef *texel,
2519 boolean is_samplei)
2520 {
2521 unsigned unit, target;
2522 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2523 LLVMValueRef explicit_lod = NULL;
2524 LLVMValueRef coords[5];
2525 LLVMValueRef offsets[3] = { NULL };
2526 struct lp_sampler_params params;
2527 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2528 unsigned dims, i;
2529 unsigned layer_coord = 0;
2530 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2531
2532 memset(&params, 0, sizeof(params));
2533
2534 if (!bld->sampler) {
2535 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2536 for (i = 0; i < 4; i++) {
2537 texel[i] = coord_undef;
2538 }
2539 return;
2540 }
2541
2542 unit = inst->Src[1].Register.Index;
2543
2544 if (is_samplei) {
2545 target = bld->sv[unit].Resource;
2546 }
2547 else {
2548 target = inst->Texture.Texture;
2549 }
2550
2551 switch (target) {
2552 case TGSI_TEXTURE_1D:
2553 case TGSI_TEXTURE_BUFFER:
2554 dims = 1;
2555 break;
2556 case TGSI_TEXTURE_1D_ARRAY:
2557 layer_coord = 1;
2558 dims = 1;
2559 break;
2560 case TGSI_TEXTURE_2D:
2561 case TGSI_TEXTURE_RECT:
2562 case TGSI_TEXTURE_2D_MSAA:
2563 dims = 2;
2564 break;
2565 case TGSI_TEXTURE_2D_ARRAY:
2566 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2567 layer_coord = 2;
2568 dims = 2;
2569 break;
2570 case TGSI_TEXTURE_3D:
2571 dims = 3;
2572 break;
2573 default:
2574 assert(0);
2575 return;
2576 }
2577
2578 /* always have lod except for buffers and msaa targets ? */
2579 if (target != TGSI_TEXTURE_BUFFER &&
2580 target != TGSI_TEXTURE_2D_MSAA &&
2581 target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2582 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2583 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2584 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2585 }
2586 /*
2587 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2588 * would be the sample index.
2589 */
2590
2591 for (i = 0; i < dims; i++) {
2592 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2593 }
2594 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2595 for (i = dims; i < 5; i++) {
2596 coords[i] = coord_undef;
2597 }
2598 if (layer_coord)
2599 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2600
2601 if (inst->Texture.NumOffsets == 1) {
2602 unsigned dim;
2603 sample_key |= LP_SAMPLER_OFFSETS;
2604 for (dim = 0; dim < dims; dim++) {
2605 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2606 }
2607 }
2608 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2609
2610 params.type = bld->bld_base.base.type;
2611 params.sample_key = sample_key;
2612 params.texture_index = unit;
2613 /*
2614 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2615 * and trigger some assertions with d3d10 where the sampler view number
2616 * can exceed this.
2617 */
2618 params.sampler_index = 0;
2619 params.context_ptr = bld->context_ptr;
2620 params.thread_data_ptr = bld->thread_data_ptr;
2621 params.coords = coords;
2622 params.offsets = offsets;
2623 params.derivs = NULL;
2624 params.lod = explicit_lod;
2625 params.texel = texel;
2626
2627 bld->sampler->emit_tex_sample(bld->sampler,
2628 bld->bld_base.base.gallivm,
2629 &params);
2630
2631 if (is_samplei &&
2632 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2633 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2634 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2635 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2636 unsigned char swizzles[4];
2637 swizzles[0] = inst->Src[1].Register.SwizzleX;
2638 swizzles[1] = inst->Src[1].Register.SwizzleY;
2639 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2640 swizzles[3] = inst->Src[1].Register.SwizzleW;
2641
2642 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2643 }
2644 }
2645
2646 static void
2647 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2648 const struct tgsi_full_instruction *inst,
2649 LLVMValueRef *sizes_out,
2650 boolean is_sviewinfo)
2651 {
2652 LLVMValueRef explicit_lod;
2653 enum lp_sampler_lod_property lod_property;
2654 unsigned has_lod;
2655 unsigned i;
2656 unsigned unit = inst->Src[1].Register.Index;
2657 unsigned target, pipe_target;
2658 struct lp_sampler_size_query_params params;
2659
2660 if (is_sviewinfo) {
2661 target = bld->sv[unit].Resource;
2662 }
2663 else {
2664 target = inst->Texture.Texture;
2665 }
2666 switch (target) {
2667 case TGSI_TEXTURE_BUFFER:
2668 case TGSI_TEXTURE_RECT:
2669 case TGSI_TEXTURE_SHADOWRECT:
2670 has_lod = 0;
2671 break;
2672 default:
2673 has_lod = 1;
2674 break;
2675 }
2676
2677 if (!bld->sampler) {
2678 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2679 for (i = 0; i < 4; i++)
2680 sizes_out[i] = bld->bld_base.int_bld.undef;
2681 return;
2682 }
2683
2684 if (has_lod) {
2685 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2686 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2687 }
2688 else {
2689 explicit_lod = NULL;
2690 lod_property = LP_SAMPLER_LOD_SCALAR;
2691 }
2692
2693
2694 pipe_target = tgsi_to_pipe_tex_target(target);
2695
2696 params.int_type = bld->bld_base.int_bld.type;
2697 params.texture_unit = unit;
2698 params.target = pipe_target;
2699 params.context_ptr = bld->context_ptr;
2700 params.is_sviewinfo = TRUE;
2701 params.lod_property = lod_property;
2702 params.explicit_lod = explicit_lod;
2703 params.sizes_out = sizes_out;
2704
2705 bld->sampler->emit_size_query(bld->sampler,
2706 bld->bld_base.base.gallivm,
2707 &params);
2708 }
2709
2710 static boolean
2711 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2712 int pc)
2713 {
2714 unsigned i;
2715
2716 for (i = 0; i < 5; i++) {
2717 enum tgsi_opcode opcode;
2718
2719 if (pc + i >= bld->bld_base.info->num_instructions)
2720 return TRUE;
2721
2722 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2723
2724 if (opcode == TGSI_OPCODE_END)
2725 return TRUE;
2726
2727 if (opcode == TGSI_OPCODE_TEX ||
2728 opcode == TGSI_OPCODE_TXP ||
2729 opcode == TGSI_OPCODE_TXD ||
2730 opcode == TGSI_OPCODE_TXB ||
2731 opcode == TGSI_OPCODE_TXL ||
2732 opcode == TGSI_OPCODE_TXF ||
2733 opcode == TGSI_OPCODE_TXQ ||
2734 opcode == TGSI_OPCODE_TEX2 ||
2735 opcode == TGSI_OPCODE_TXB2 ||
2736 opcode == TGSI_OPCODE_TXL2 ||
2737 opcode == TGSI_OPCODE_SAMPLE ||
2738 opcode == TGSI_OPCODE_SAMPLE_B ||
2739 opcode == TGSI_OPCODE_SAMPLE_C ||
2740 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2741 opcode == TGSI_OPCODE_SAMPLE_D ||
2742 opcode == TGSI_OPCODE_SAMPLE_I ||
2743 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2744 opcode == TGSI_OPCODE_SAMPLE_L ||
2745 opcode == TGSI_OPCODE_SVIEWINFO ||
2746 opcode == TGSI_OPCODE_CAL ||
2747 opcode == TGSI_OPCODE_IF ||
2748 opcode == TGSI_OPCODE_UIF ||
2749 opcode == TGSI_OPCODE_BGNLOOP ||
2750 opcode == TGSI_OPCODE_SWITCH)
2751 return FALSE;
2752 }
2753
2754 return TRUE;
2755 }
2756
2757
2758
2759 /**
2760 * Kill fragment if any of the src register values are negative.
2761 */
2762 static void
2763 emit_kill_if(
2764 struct lp_build_tgsi_soa_context *bld,
2765 const struct tgsi_full_instruction *inst,
2766 int pc)
2767 {
2768 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2769 const struct tgsi_full_src_register *reg = &inst->Src[0];
2770 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2771 LLVMValueRef mask;
2772 unsigned chan_index;
2773
2774 memset(&terms, 0, sizeof terms);
2775
2776 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2777 unsigned swizzle;
2778
2779 /* Unswizzle channel */
2780 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2781
2782 /* Check if the component has not been already tested. */
2783 assert(swizzle < TGSI_NUM_CHANNELS);
2784 if( !terms[swizzle] )
2785 /* TODO: change the comparison operator instead of setting the sign */
2786 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2787 }
2788
2789 mask = NULL;
2790 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2791 if(terms[chan_index]) {
2792 LLVMValueRef chan_mask;
2793
2794 /*
2795 * If term < 0 then mask = 0 else mask = ~0.
2796 */
2797 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2798
2799 if(mask)
2800 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2801 else
2802 mask = chan_mask;
2803 }
2804 }
2805
2806 if (bld->exec_mask.has_mask) {
2807 LLVMValueRef invmask;
2808 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2809 mask = LLVMBuildOr(builder, mask, invmask, "");
2810 }
2811
2812 lp_build_mask_update(bld->mask, mask);
2813 if (!near_end_of_shader(bld, pc))
2814 lp_build_mask_check(bld->mask);
2815 }
2816
2817
2818 /**
2819 * Unconditional fragment kill.
2820 * The only predication is the execution mask which will apply if
2821 * we're inside a loop or conditional.
2822 */
2823 static void
2824 emit_kill(struct lp_build_tgsi_soa_context *bld,
2825 int pc)
2826 {
2827 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2828 LLVMValueRef mask;
2829
2830 /* For those channels which are "alive", disable fragment shader
2831 * execution.
2832 */
2833 if (bld->exec_mask.has_mask) {
2834 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2835 }
2836 else {
2837 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2838 mask = zero;
2839 }
2840
2841 lp_build_mask_update(bld->mask, mask);
2842
2843 if (!near_end_of_shader(bld, pc))
2844 lp_build_mask_check(bld->mask);
2845 }
2846
2847
2848 /**
2849 * Emit code which will dump the value of all the temporary registers
2850 * to stdout.
2851 */
2852 static void
2853 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2854 unsigned file)
2855 {
2856 const struct tgsi_shader_info *info = bld->bld_base.info;
2857 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2858 LLVMBuilderRef builder = gallivm->builder;
2859 LLVMValueRef reg_ptr;
2860 int index;
2861 int max_index = info->file_max[file];
2862
2863 /*
2864 * Some register files, particularly constants, can be very large,
2865 * and dumping everything could make this unusably slow.
2866 */
2867 max_index = MIN2(max_index, 32);
2868
2869 for (index = 0; index <= max_index; index++) {
2870 LLVMValueRef res;
2871 unsigned mask;
2872 int chan;
2873
2874 if (index < 8 * sizeof(unsigned) &&
2875 (info->file_mask[file] & (1u << index)) == 0) {
2876 /* This was not declared.*/
2877 continue;
2878 }
2879
2880 if (file == TGSI_FILE_INPUT) {
2881 mask = info->input_usage_mask[index];
2882 } else {
2883 mask = TGSI_WRITEMASK_XYZW;
2884 }
2885
2886 for (chan = 0; chan < 4; chan++) {
2887 if ((mask & (1 << chan)) == 0) {
2888 /* This channel is not used.*/
2889 continue;
2890 }
2891
2892 if (file == TGSI_FILE_CONSTANT) {
2893 struct tgsi_full_src_register reg;
2894 memset(&reg, 0, sizeof reg);
2895 reg.Register.File = file;
2896 reg.Register.Index = index;
2897 reg.Register.SwizzleX = 0;
2898 reg.Register.SwizzleY = 1;
2899 reg.Register.SwizzleZ = 2;
2900 reg.Register.SwizzleW = 3;
2901
2902 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2903 if (!res) {
2904 continue;
2905 }
2906 } else if (file == TGSI_FILE_INPUT) {
2907 res = bld->inputs[index][chan];
2908 if (!res) {
2909 continue;
2910 }
2911 } else if (file == TGSI_FILE_TEMPORARY) {
2912 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2913 assert(reg_ptr);
2914 res = LLVMBuildLoad(builder, reg_ptr, "");
2915 } else if (file == TGSI_FILE_OUTPUT) {
2916 reg_ptr = lp_get_output_ptr(bld, index, chan);
2917 assert(reg_ptr);
2918 res = LLVMBuildLoad(builder, reg_ptr, "");
2919 } else {
2920 assert(0);
2921 continue;
2922 }
2923
2924 emit_dump_reg(gallivm, file, index, chan, res);
2925 }
2926 }
2927 }
2928
2929
2930
2931 void
2932 lp_emit_declaration_soa(
2933 struct lp_build_tgsi_context *bld_base,
2934 const struct tgsi_full_declaration *decl)
2935 {
2936 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2937 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2938 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2939 const unsigned first = decl->Range.First;
2940 const unsigned last = decl->Range.Last;
2941 unsigned idx, i;
2942
2943 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2944
2945 switch (decl->Declaration.File) {
2946 case TGSI_FILE_TEMPORARY:
2947 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2948 assert(last < LP_MAX_INLINED_TEMPS);
2949 for (idx = first; idx <= last; ++idx) {
2950 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2951 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2952 }
2953 }
2954 break;
2955
2956 case TGSI_FILE_OUTPUT:
2957 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2958 for (idx = first; idx <= last; ++idx) {
2959 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2960 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2961 vec_type, "output");
2962 }
2963 }
2964 break;
2965
2966 case TGSI_FILE_ADDRESS:
2967 /* ADDR registers are only allocated with an integer LLVM IR type,
2968 * as they are guaranteed to always have integers.
2969 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2970 * an ADDR register for that matter).
2971 */
2972 assert(last < LP_MAX_TGSI_ADDRS);
2973 for (idx = first; idx <= last; ++idx) {
2974 assert(idx < LP_MAX_TGSI_ADDRS);
2975 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2976 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2977 }
2978 break;
2979
2980 case TGSI_FILE_SAMPLER_VIEW:
2981 /*
2982 * The target stored here MUST match whatever there actually
2983 * is in the set sampler views (what about return type?).
2984 */
2985 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2986 for (idx = first; idx <= last; ++idx) {
2987 bld->sv[idx] = decl->SamplerView;
2988 }
2989 break;
2990
2991 case TGSI_FILE_CONSTANT:
2992 {
2993 /*
2994 * We could trivially fetch the per-buffer pointer when fetching the
2995 * constant, relying on llvm to figure out it's always the same pointer
2996 * anyway. However, doing so results in a huge (more than factor of 10)
2997 * slowdown in llvm compilation times for some (but not all) shaders
2998 * (more specifically, the IR optimization spends way more time in
2999 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
3000 */
3001 unsigned idx2D = decl->Dim.Index2D;
3002 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
3003 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
3004 bld->consts[idx2D] =
3005 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
3006 bld->consts_sizes[idx2D] =
3007 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
3008 }
3009 break;
3010 case TGSI_FILE_BUFFER:
3011 {
3012 unsigned idx = decl->Range.First;
3013 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
3014 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
3015 bld->ssbos[idx] =
3016 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
3017 bld->ssbo_sizes[idx] =
3018 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
3019
3020 }
3021 break;
3022 case TGSI_FILE_MEMORY:
3023 break;
3024 default:
3025 /* don't need to declare other vars */
3026 break;
3027 }
3028 }
3029
3030
3031 void lp_emit_immediate_soa(
3032 struct lp_build_tgsi_context *bld_base,
3033 const struct tgsi_full_immediate *imm)
3034 {
3035 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3036 struct gallivm_state * gallivm = bld_base->base.gallivm;
3037 LLVMValueRef imms[4];
3038 unsigned i;
3039 const uint size = imm->Immediate.NrTokens - 1;
3040 assert(size <= 4);
3041 switch (imm->Immediate.DataType) {
3042 case TGSI_IMM_FLOAT32:
3043 for( i = 0; i < size; ++i )
3044 imms[i] =
3045 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3046
3047 break;
3048 case TGSI_IMM_FLOAT64:
3049 case TGSI_IMM_UINT64:
3050 case TGSI_IMM_INT64:
3051 case TGSI_IMM_UINT32:
3052 for( i = 0; i < size; ++i ) {
3053 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3054 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3055 }
3056
3057 break;
3058 case TGSI_IMM_INT32:
3059 for( i = 0; i < size; ++i ) {
3060 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3061 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3062 }
3063
3064 break;
3065 }
3066 for( i = size; i < 4; ++i )
3067 imms[i] = bld_base->base.undef;
3068
3069 if (bld->use_immediates_array) {
3070 unsigned index = bld->num_immediates;
3071 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3072 LLVMBuilderRef builder = gallivm->builder;
3073 LLVMValueRef gep[2];
3074 gep[0] = lp_build_const_int32(gallivm, 0);
3075
3076 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3077 for (i = 0; i < 4; ++i ) {
3078 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3079 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3080 bld->imms_array, gep, 2, "");
3081 LLVMBuildStore(builder, imms[i], imm_ptr);
3082 }
3083 } else {
3084 /* simply copy the immediate values into the next immediates[] slot */
3085 unsigned i;
3086 assert(imm->Immediate.NrTokens - 1 <= 4);
3087 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3088
3089 for(i = 0; i < 4; ++i )
3090 bld->immediates[bld->num_immediates][i] = imms[i];
3091
3092 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3093 unsigned index = bld->num_immediates;
3094 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3095 LLVMBuilderRef builder = gallivm->builder;
3096 LLVMValueRef gep[2];
3097 gep[0] = lp_build_const_int32(gallivm, 0);
3098 for (i = 0; i < 4; ++i ) {
3099 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3100 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3101 bld->imms_array, gep, 2, "");
3102 LLVMBuildStore(builder,
3103 bld->immediates[index][i],
3104 imm_ptr);
3105 }
3106 }
3107 }
3108
3109 bld->num_immediates++;
3110 }
3111
3112 static void
3113 ddx_emit(
3114 const struct lp_build_tgsi_action * action,
3115 struct lp_build_tgsi_context * bld_base,
3116 struct lp_build_emit_data * emit_data)
3117 {
3118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3119
3120 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3121 &emit_data->output[emit_data->chan], NULL);
3122 }
3123
3124 static void
3125 ddy_emit(
3126 const struct lp_build_tgsi_action * action,
3127 struct lp_build_tgsi_context * bld_base,
3128 struct lp_build_emit_data * emit_data)
3129 {
3130 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3131
3132 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3133 &emit_data->output[emit_data->chan]);
3134 }
3135
3136 static void
3137 kill_emit(
3138 const struct lp_build_tgsi_action * action,
3139 struct lp_build_tgsi_context * bld_base,
3140 struct lp_build_emit_data * emit_data)
3141 {
3142 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3143
3144 emit_kill(bld, bld_base->pc - 1);
3145 }
3146
3147 static void
3148 kill_if_emit(
3149 const struct lp_build_tgsi_action * action,
3150 struct lp_build_tgsi_context * bld_base,
3151 struct lp_build_emit_data * emit_data)
3152 {
3153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3154
3155 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3156 }
3157
3158 static void
3159 tex_emit(
3160 const struct lp_build_tgsi_action * action,
3161 struct lp_build_tgsi_context * bld_base,
3162 struct lp_build_emit_data * emit_data)
3163 {
3164 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3165
3166 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3167 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3168 }
3169
3170 static void
3171 tex2_emit(
3172 const struct lp_build_tgsi_action * action,
3173 struct lp_build_tgsi_context * bld_base,
3174 struct lp_build_emit_data * emit_data)
3175 {
3176 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3177
3178 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3179 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3180 }
3181
3182 static void
3183 txb_emit(
3184 const struct lp_build_tgsi_action * action,
3185 struct lp_build_tgsi_context * bld_base,
3186 struct lp_build_emit_data * emit_data)
3187 {
3188 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3189
3190 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3191 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3192 }
3193
3194 static void
3195 txb2_emit(
3196 const struct lp_build_tgsi_action * action,
3197 struct lp_build_tgsi_context * bld_base,
3198 struct lp_build_emit_data * emit_data)
3199 {
3200 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3201
3202 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3203 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3204 }
3205
3206 static void
3207 txd_emit(
3208 const struct lp_build_tgsi_action * action,
3209 struct lp_build_tgsi_context * bld_base,
3210 struct lp_build_emit_data * emit_data)
3211 {
3212 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3213
3214 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3215 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3216 }
3217
3218 static void
3219 txl_emit(
3220 const struct lp_build_tgsi_action * action,
3221 struct lp_build_tgsi_context * bld_base,
3222 struct lp_build_emit_data * emit_data)
3223 {
3224 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3225
3226 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3227 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3228 }
3229
3230 static void
3231 txl2_emit(
3232 const struct lp_build_tgsi_action * action,
3233 struct lp_build_tgsi_context * bld_base,
3234 struct lp_build_emit_data * emit_data)
3235 {
3236 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3237
3238 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3239 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3240 }
3241
3242 static void
3243 txp_emit(
3244 const struct lp_build_tgsi_action * action,
3245 struct lp_build_tgsi_context * bld_base,
3246 struct lp_build_emit_data * emit_data)
3247 {
3248 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3249
3250 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3251 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3252 }
3253
3254 static void
3255 tg4_emit(
3256 const struct lp_build_tgsi_action * action,
3257 struct lp_build_tgsi_context * bld_base,
3258 struct lp_build_emit_data * emit_data)
3259 {
3260 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3261
3262 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3263 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3264 }
3265
3266 static void
3267 lodq_emit(
3268 const struct lp_build_tgsi_action * action,
3269 struct lp_build_tgsi_context * bld_base,
3270 struct lp_build_emit_data * emit_data)
3271 {
3272 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3273
3274 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3275 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3276 }
3277
3278 static void
3279 txq_emit(
3280 const struct lp_build_tgsi_action * action,
3281 struct lp_build_tgsi_context * bld_base,
3282 struct lp_build_emit_data * emit_data)
3283 {
3284 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3285
3286 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3287 }
3288
3289 static void
3290 txf_emit(
3291 const struct lp_build_tgsi_action * action,
3292 struct lp_build_tgsi_context * bld_base,
3293 struct lp_build_emit_data * emit_data)
3294 {
3295 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3296
3297 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3298 }
3299
3300 static void
3301 sample_i_emit(
3302 const struct lp_build_tgsi_action * action,
3303 struct lp_build_tgsi_context * bld_base,
3304 struct lp_build_emit_data * emit_data)
3305 {
3306 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3307
3308 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3309 }
3310
3311 static void
3312 sample_emit(
3313 const struct lp_build_tgsi_action * action,
3314 struct lp_build_tgsi_context * bld_base,
3315 struct lp_build_emit_data * emit_data)
3316 {
3317 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3318
3319 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3320 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3321 }
3322
3323 static void
3324 sample_b_emit(
3325 const struct lp_build_tgsi_action * action,
3326 struct lp_build_tgsi_context * bld_base,
3327 struct lp_build_emit_data * emit_data)
3328 {
3329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3330
3331 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3332 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3333 }
3334
3335 static void
3336 sample_c_emit(
3337 const struct lp_build_tgsi_action * action,
3338 struct lp_build_tgsi_context * bld_base,
3339 struct lp_build_emit_data * emit_data)
3340 {
3341 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3342
3343 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3344 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3345 }
3346
3347 static void
3348 sample_c_lz_emit(
3349 const struct lp_build_tgsi_action * action,
3350 struct lp_build_tgsi_context * bld_base,
3351 struct lp_build_emit_data * emit_data)
3352 {
3353 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3354
3355 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3356 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3357 }
3358
3359 static void
3360 sample_d_emit(
3361 const struct lp_build_tgsi_action * action,
3362 struct lp_build_tgsi_context * bld_base,
3363 struct lp_build_emit_data * emit_data)
3364 {
3365 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3366
3367 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3368 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3369 }
3370
3371 static void
3372 sample_l_emit(
3373 const struct lp_build_tgsi_action * action,
3374 struct lp_build_tgsi_context * bld_base,
3375 struct lp_build_emit_data * emit_data)
3376 {
3377 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3378
3379 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3380 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3381 }
3382
3383 static void
3384 gather4_emit(
3385 const struct lp_build_tgsi_action * action,
3386 struct lp_build_tgsi_context * bld_base,
3387 struct lp_build_emit_data * emit_data)
3388 {
3389 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3390
3391 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3392 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3393 }
3394
3395 static void
3396 sviewinfo_emit(
3397 const struct lp_build_tgsi_action * action,
3398 struct lp_build_tgsi_context * bld_base,
3399 struct lp_build_emit_data * emit_data)
3400 {
3401 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3402
3403 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3404 }
3405
3406 static void
3407 lod_emit(
3408 const struct lp_build_tgsi_action * action,
3409 struct lp_build_tgsi_context * bld_base,
3410 struct lp_build_emit_data * emit_data)
3411 {
3412 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3413
3414 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3415 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3416 }
3417
3418 static void target_to_dims_layer(unsigned target,
3419 unsigned *dims,
3420 unsigned *layer_coord)
3421 {
3422 *layer_coord = 0;
3423 switch (target) {
3424 case TGSI_TEXTURE_1D:
3425 case TGSI_TEXTURE_BUFFER:
3426 *dims = 1;
3427 break;
3428 case TGSI_TEXTURE_1D_ARRAY:
3429 *layer_coord = 1;
3430 *dims = 1;
3431 break;
3432 case TGSI_TEXTURE_2D:
3433 case TGSI_TEXTURE_RECT:
3434 *dims = 2;
3435 break;
3436 case TGSI_TEXTURE_2D_ARRAY:
3437 *layer_coord = 2;
3438 *dims = 2;
3439 break;
3440 case TGSI_TEXTURE_3D:
3441 case TGSI_TEXTURE_CUBE:
3442 case TGSI_TEXTURE_CUBE_ARRAY:
3443 *dims = 3;
3444 break;
3445 default:
3446 assert(0);
3447 return;
3448 }
3449 }
3450
3451 static void
3452 img_load_emit(
3453 const struct lp_build_tgsi_action * action,
3454 struct lp_build_tgsi_context * bld_base,
3455 struct lp_build_emit_data * emit_data)
3456 {
3457 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3458 struct lp_img_params params;
3459 LLVMValueRef coords[5];
3460 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3461 unsigned dims;
3462 unsigned target = emit_data->inst->Memory.Texture;
3463 unsigned layer_coord;
3464
3465 target_to_dims_layer(target, &dims, &layer_coord);
3466
3467 for (unsigned i = 0; i < dims; i++) {
3468 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3469 }
3470 for (unsigned i = dims; i < 5; i++) {
3471 coords[i] = coord_undef;
3472 }
3473 if (layer_coord)
3474 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3475
3476 memset(&params, 0, sizeof(params));
3477
3478 params.type = bld->bld_base.base.type;
3479 params.context_ptr = bld->context_ptr;
3480 params.thread_data_ptr = bld->thread_data_ptr;
3481 params.coords = coords;
3482 params.outdata = emit_data->output;
3483 params.target = tgsi_to_pipe_tex_target(target);
3484 params.image_index = emit_data->inst->Src[0].Register.Index;
3485 params.img_op = LP_IMG_LOAD;
3486 bld->image->emit_op(bld->image,
3487 bld->bld_base.base.gallivm,
3488 &params);
3489 }
3490
3491 static void
3492 load_emit(
3493 const struct lp_build_tgsi_action * action,
3494 struct lp_build_tgsi_context * bld_base,
3495 struct lp_build_emit_data * emit_data)
3496 {
3497 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3498 struct gallivm_state * gallivm = bld_base->base.gallivm;
3499 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3500 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3501 unsigned buf = bufreg->Register.Index;
3502 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3503 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3504 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3505
3506 if (bufreg->Register.File == TGSI_FILE_IMAGE)
3507 img_load_emit(action, bld_base, emit_data);
3508 else if (0) {
3509 /* for indirect support with ARB_gpu_shader5 */
3510 } else {
3511 LLVMValueRef index;
3512 LLVMValueRef scalar, scalar_ptr;
3513 unsigned chan_index;
3514
3515 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3516 index = lp_build_shr_imm(uint_bld, index, 2);
3517
3518 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3519
3520 LLVMValueRef ssbo_limit;
3521
3522 if (!is_shared) {
3523 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3524 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3525 }
3526
3527 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3528 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3529
3530 LLVMValueRef exec_mask = mask_vec(bld_base);
3531 if (!is_shared) {
3532 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3533 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3534 }
3535
3536 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3537 struct lp_build_loop_state loop_state;
3538 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3539
3540 struct lp_build_if_state ifthen;
3541 LLVMValueRef cond, temp_res;
3542
3543 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3544 loop_state.counter, "");
3545
3546 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3547 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3548
3549 lp_build_if(&ifthen, gallivm, cond);
3550 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3551
3552 temp_res = LLVMBuildLoad(builder, result, "");
3553 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3554 LLVMBuildStore(builder, temp_res, result);
3555 lp_build_else(&ifthen);
3556 temp_res = LLVMBuildLoad(builder, result, "");
3557 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3558 LLVMBuildStore(builder, temp_res, result);
3559 lp_build_endif(&ifthen);
3560 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3561 NULL, LLVMIntUGE);
3562 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3563 }
3564 }
3565 }
3566
3567 static void
3568 img_store_emit(
3569 const struct lp_build_tgsi_action * action,
3570 struct lp_build_tgsi_context * bld_base,
3571 struct lp_build_emit_data * emit_data)
3572 {
3573 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3574 struct lp_img_params params;
3575 LLVMValueRef coords[5];
3576 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3577 unsigned dims;
3578 unsigned target = emit_data->inst->Memory.Texture;
3579 unsigned layer_coord;
3580
3581 target_to_dims_layer(target, &dims, &layer_coord);
3582 for (unsigned i = 0; i < dims; i++) {
3583 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3584 }
3585 for (unsigned i = dims; i < 5; i++) {
3586 coords[i] = coord_undef;
3587 }
3588 if (layer_coord)
3589 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3590 memset(&params, 0, sizeof(params));
3591
3592 params.type = bld->bld_base.base.type;
3593 params.context_ptr = bld->context_ptr;
3594 params.thread_data_ptr = bld->thread_data_ptr;
3595 params.coords = coords;
3596 params.outdata = NULL;
3597 params.exec_mask = mask_vec(bld_base);
3598 params.target = tgsi_to_pipe_tex_target(target);
3599 params.image_index = emit_data->inst->Dst[0].Register.Index;
3600 params.img_op = LP_IMG_STORE;
3601 for (unsigned i = 0; i < 4; i++)
3602 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3603
3604 bld->image->emit_op(bld->image,
3605 bld->bld_base.base.gallivm,
3606 &params);
3607 }
3608
3609 static void
3610 store_emit(
3611 const struct lp_build_tgsi_action * action,
3612 struct lp_build_tgsi_context * bld_base,
3613 struct lp_build_emit_data * emit_data)
3614 {
3615 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3616 struct gallivm_state * gallivm = bld_base->base.gallivm;
3617 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3618 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3619 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3620 unsigned buf = bufreg->Register.Index;
3621 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3622 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3623
3624 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3625 img_store_emit(action, bld_base, emit_data);
3626 } else if (0) {
3627
3628 } else {
3629 LLVMValueRef index; /* index into the const buffer */
3630 LLVMValueRef scalar_ptr;
3631 LLVMValueRef value;
3632 unsigned chan_index;
3633
3634 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3635 index = lp_build_shr_imm(uint_bld, index, 2);
3636
3637 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3638
3639 LLVMValueRef ssbo_limit;
3640
3641 if (!is_shared) {
3642 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3643 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3644 }
3645
3646 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3647 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3648
3649 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3650
3651 LLVMValueRef exec_mask = mask_vec(bld_base);
3652 if (!is_shared) {
3653 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3654 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3655 }
3656
3657 struct lp_build_loop_state loop_state;
3658 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3659
3660 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3661 loop_state.counter, "");
3662 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3663
3664 struct lp_build_if_state ifthen;
3665 LLVMValueRef cond;
3666
3667 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3668 loop_state.counter, "");
3669
3670 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3671 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3672 lp_build_if(&ifthen, gallivm, cond);
3673
3674 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3675
3676 lp_build_endif(&ifthen);
3677 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3678 NULL, LLVMIntUGE);
3679 }
3680 }
3681 }
3682
3683 static void
3684 resq_emit(
3685 const struct lp_build_tgsi_action * action,
3686 struct lp_build_tgsi_context * bld_base,
3687 struct lp_build_emit_data * emit_data)
3688 {
3689 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3690 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3691 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3692
3693 unsigned buf = bufreg->Register.Index;
3694 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3695
3696 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3697 unsigned target = emit_data->inst->Memory.Texture;
3698 struct lp_sampler_size_query_params params = { 0 };
3699 params.int_type = bld->bld_base.int_bld.type;
3700 params.texture_unit = buf;
3701 params.target = tgsi_to_pipe_tex_target(target);
3702 params.context_ptr = bld->context_ptr;
3703 params.sizes_out = emit_data->output;
3704
3705 bld->image->emit_size_query(bld->image,
3706 bld->bld_base.base.gallivm,
3707 &params);
3708 } else {
3709 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3710
3711 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3712 }
3713 }
3714
3715 static void
3716 img_atomic_emit(
3717 const struct lp_build_tgsi_action * action,
3718 struct lp_build_tgsi_context * bld_base,
3719 struct lp_build_emit_data * emit_data,
3720 LLVMAtomicRMWBinOp op)
3721 {
3722 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3723 struct lp_img_params params;
3724 LLVMValueRef coords[5];
3725 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3726 unsigned dims;
3727 unsigned layer_coord;
3728 unsigned target = emit_data->inst->Memory.Texture;
3729
3730 target_to_dims_layer(target, &dims, &layer_coord);
3731
3732 for (unsigned i = 0; i < dims; i++) {
3733 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3734 }
3735 for (unsigned i = dims; i < 5; i++) {
3736 coords[i] = coord_undef;
3737 }
3738 if (layer_coord)
3739 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3740 memset(&params, 0, sizeof(params));
3741
3742 params.type = bld->bld_base.base.type;
3743 params.context_ptr = bld->context_ptr;
3744 params.thread_data_ptr = bld->thread_data_ptr;
3745 params.exec_mask = mask_vec(bld_base);
3746 params.image_index = emit_data->inst->Src[0].Register.Index;
3747 params.coords = coords;
3748 params.target = tgsi_to_pipe_tex_target(target);
3749 params.op = op;
3750 params.outdata = emit_data->output;
3751 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3752
3753 for (unsigned i = 0; i < 4; i++)
3754 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3755 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3756 for (unsigned i = 0; i < 4; i++)
3757 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3758 }
3759 bld->image->emit_op(bld->image,
3760 bld->bld_base.base.gallivm,
3761 &params);
3762 }
3763
3764 static void
3765 atomic_emit(
3766 const struct lp_build_tgsi_action * action,
3767 struct lp_build_tgsi_context * bld_base,
3768 struct lp_build_emit_data * emit_data)
3769 {
3770 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3771 struct gallivm_state * gallivm = bld_base->base.gallivm;
3772 LLVMBuilderRef builder = gallivm->builder;
3773 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3774 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3775
3776 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3777 unsigned buf = bufreg->Register.Index;
3778 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3779
3780 LLVMAtomicRMWBinOp op;
3781 switch (emit_data->inst->Instruction.Opcode) {
3782 case TGSI_OPCODE_ATOMUADD:
3783 op = LLVMAtomicRMWBinOpAdd;
3784 break;
3785 case TGSI_OPCODE_ATOMXCHG:
3786 op = LLVMAtomicRMWBinOpXchg;
3787 break;
3788 case TGSI_OPCODE_ATOMAND:
3789 op = LLVMAtomicRMWBinOpAnd;
3790 break;
3791 case TGSI_OPCODE_ATOMOR:
3792 op = LLVMAtomicRMWBinOpOr;
3793 break;
3794 case TGSI_OPCODE_ATOMXOR:
3795 op = LLVMAtomicRMWBinOpXor;
3796 break;
3797 case TGSI_OPCODE_ATOMUMIN:
3798 op = LLVMAtomicRMWBinOpUMin;
3799 break;
3800 case TGSI_OPCODE_ATOMUMAX:
3801 op = LLVMAtomicRMWBinOpUMax;
3802 break;
3803 case TGSI_OPCODE_ATOMIMIN:
3804 op = LLVMAtomicRMWBinOpMin;
3805 break;
3806 case TGSI_OPCODE_ATOMIMAX:
3807 op = LLVMAtomicRMWBinOpMax;
3808 break;
3809 case TGSI_OPCODE_ATOMCAS:
3810 break;
3811 default:
3812 assert(0);
3813 return;
3814 }
3815
3816 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3817 img_atomic_emit(action, bld_base, emit_data, op);
3818 } else if (0) {
3819 } else {
3820 LLVMValueRef index; /* index into the const buffer */
3821 LLVMValueRef scalar, scalar_ptr;
3822 LLVMValueRef value;
3823
3824 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3825 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3826
3827 index = lp_build_shr_imm(uint_bld, index, 2);
3828
3829 if (!is_shared) {
3830 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3831 scalar_ptr = bld->ssbos[buf];
3832 } else
3833 scalar_ptr = bld->shared_ptr;
3834
3835 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3836 uint_bld->vec_type, "");
3837
3838 LLVMValueRef ssbo_limit;
3839 if (!is_shared) {
3840 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3841 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3842 }
3843
3844 LLVMValueRef exec_mask = mask_vec(bld_base);
3845
3846 if (!is_shared) {
3847 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3848 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3849 }
3850
3851 struct lp_build_loop_state loop_state;
3852 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3853
3854 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3855 loop_state.counter, "");
3856 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3857
3858 index = LLVMBuildExtractElement(gallivm->builder, index,
3859 loop_state.counter, "");
3860
3861 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3862 &index, 1, "");
3863
3864 struct lp_build_if_state ifthen;
3865 LLVMValueRef cond, temp_res;
3866
3867 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3868 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3869 lp_build_if(&ifthen, gallivm, cond);
3870
3871 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3872 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3873 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3874 loop_state.counter, "");
3875 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3876 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3877 cas_src_ptr,
3878 LLVMAtomicOrderingSequentiallyConsistent,
3879 LLVMAtomicOrderingSequentiallyConsistent,
3880 false);
3881 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3882 } else {
3883 scalar = LLVMBuildAtomicRMW(builder, op,
3884 scalar_ptr, value_ptr,
3885 LLVMAtomicOrderingSequentiallyConsistent,
3886 false);
3887 }
3888 temp_res = LLVMBuildLoad(builder, atom_res, "");
3889 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3890 LLVMBuildStore(builder, temp_res, atom_res);
3891 lp_build_else(&ifthen);
3892 temp_res = LLVMBuildLoad(builder, atom_res, "");
3893 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3894 LLVMBuildStore(builder, temp_res, atom_res);
3895 lp_build_endif(&ifthen);
3896
3897 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3898 NULL, LLVMIntUGE);
3899 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3900 }
3901 }
3902
3903 static void
3904 barrier_emit(
3905 const struct lp_build_tgsi_action * action,
3906 struct lp_build_tgsi_context * bld_base,
3907 struct lp_build_emit_data * emit_data)
3908 {
3909 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3910 struct gallivm_state * gallivm = bld_base->base.gallivm;
3911
3912 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3913
3914 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3915 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3916 }
3917
3918 static void
3919 membar_emit(
3920 const struct lp_build_tgsi_action * action,
3921 struct lp_build_tgsi_context * bld_base,
3922 struct lp_build_emit_data * emit_data)
3923 {
3924 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3925 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3926 }
3927
3928 static void
3929 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3930 LLVMValueRef ptr,
3931 LLVMValueRef mask)
3932 {
3933 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3934 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3935
3936 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3937
3938 LLVMBuildStore(builder, current_vec, ptr);
3939 }
3940
3941 static void
3942 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3943 LLVMValueRef ptr,
3944 LLVMValueRef mask)
3945 {
3946 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3947 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3948
3949 current_vec = lp_build_select(&bld_base->uint_bld,
3950 mask,
3951 bld_base->uint_bld.zero,
3952 current_vec);
3953
3954 LLVMBuildStore(builder, current_vec, ptr);
3955 }
3956
3957 static LLVMValueRef
3958 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3959 LLVMValueRef current_mask_vec,
3960 LLVMValueRef total_emitted_vertices_vec)
3961 {
3962 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3963 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3964 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3965 total_emitted_vertices_vec,
3966 bld->max_output_vertices_vec);
3967
3968 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3969 }
3970
3971 static void
3972 emit_vertex(
3973 const struct lp_build_tgsi_action * action,
3974 struct lp_build_tgsi_context * bld_base,
3975 struct lp_build_emit_data * emit_data)
3976 {
3977 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3978 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3979
3980 if (bld->gs_iface->emit_vertex) {
3981 LLVMValueRef mask = mask_vec(bld_base);
3982 LLVMValueRef total_emitted_vertices_vec =
3983 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3984 mask = clamp_mask_to_max_output_vertices(bld, mask,
3985 total_emitted_vertices_vec);
3986 gather_outputs(bld);
3987 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3988 bld->outputs,
3989 total_emitted_vertices_vec);
3990 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3991 mask);
3992 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3993 mask);
3994 #if DUMP_GS_EMITS
3995 lp_build_print_value(bld->bld_base.base.gallivm,
3996 " +++ emit vertex masked ones = ",
3997 mask);
3998 lp_build_print_value(bld->bld_base.base.gallivm,
3999 " +++ emit vertex emitted = ",
4000 total_emitted_vertices_vec);
4001 #endif
4002 }
4003 }
4004
4005
4006 static void
4007 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4008 LLVMValueRef mask)
4009 {
4010 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4011 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4012
4013 if (bld->gs_iface->end_primitive) {
4014 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4015 LLVMValueRef emitted_vertices_vec =
4016 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4017 LLVMValueRef emitted_prims_vec =
4018 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4019
4020 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4021 emitted_vertices_vec,
4022 uint_bld->zero);
4023 /* We need to combine the current execution mask with the mask
4024 telling us which, if any, execution slots actually have
4025 unemitted primitives, this way we make sure that end_primitives
4026 executes only on the paths that have unflushed vertices */
4027 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4028
4029 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
4030 emitted_vertices_vec,
4031 emitted_prims_vec);
4032
4033 #if DUMP_GS_EMITS
4034 lp_build_print_value(bld->bld_base.base.gallivm,
4035 " +++ end prim masked ones = ",
4036 mask);
4037 lp_build_print_value(bld->bld_base.base.gallivm,
4038 " +++ end prim emitted verts1 = ",
4039 emitted_vertices_vec);
4040 lp_build_print_value(bld->bld_base.base.gallivm,
4041 " +++ end prim emitted prims1 = ",
4042 LLVMBuildLoad(builder,
4043 bld->emitted_prims_vec_ptr, ""));
4044 #endif
4045 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4046 mask);
4047 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4048 mask);
4049 #if DUMP_GS_EMITS
4050 lp_build_print_value(bld->bld_base.base.gallivm,
4051 " +++ end prim emitted verts2 = ",
4052 LLVMBuildLoad(builder,
4053 bld->emitted_vertices_vec_ptr, ""));
4054 #endif
4055 }
4056
4057 }
4058
4059 static void
4060 end_primitive(
4061 const struct lp_build_tgsi_action * action,
4062 struct lp_build_tgsi_context * bld_base,
4063 struct lp_build_emit_data * emit_data)
4064 {
4065 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4066
4067 if (bld->gs_iface->end_primitive) {
4068 LLVMValueRef mask = mask_vec(bld_base);
4069 end_primitive_masked(bld_base, mask);
4070 }
4071 }
4072
4073 static void
4074 cal_emit(
4075 const struct lp_build_tgsi_action * action,
4076 struct lp_build_tgsi_context * bld_base,
4077 struct lp_build_emit_data * emit_data)
4078 {
4079 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4080
4081 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4082 &bld_base->pc);
4083 }
4084
4085 static void
4086 ret_emit(
4087 const struct lp_build_tgsi_action * action,
4088 struct lp_build_tgsi_context * bld_base,
4089 struct lp_build_emit_data * emit_data)
4090 {
4091 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4092
4093 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4094 }
4095
4096 static void
4097 brk_emit(
4098 const struct lp_build_tgsi_action * action,
4099 struct lp_build_tgsi_context * bld_base,
4100 struct lp_build_emit_data * emit_data)
4101 {
4102 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4103
4104 lp_exec_break(&bld->exec_mask, bld_base);
4105 }
4106
4107 static void
4108 if_emit(
4109 const struct lp_build_tgsi_action * action,
4110 struct lp_build_tgsi_context * bld_base,
4111 struct lp_build_emit_data * emit_data)
4112 {
4113 LLVMValueRef tmp;
4114 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4115
4116 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4117 emit_data->args[0], bld->bld_base.base.zero);
4118 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4119 }
4120
4121 static void
4122 uif_emit(
4123 const struct lp_build_tgsi_action * action,
4124 struct lp_build_tgsi_context * bld_base,
4125 struct lp_build_emit_data * emit_data)
4126 {
4127 LLVMValueRef tmp;
4128 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4129 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4130
4131 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4132 emit_data->args[0], uint_bld->zero);
4133 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4134 }
4135
4136 static void
4137 case_emit(
4138 const struct lp_build_tgsi_action * action,
4139 struct lp_build_tgsi_context * bld_base,
4140 struct lp_build_emit_data * emit_data)
4141 {
4142 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4143
4144 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4145 }
4146
4147 static void
4148 default_emit(
4149 const struct lp_build_tgsi_action * action,
4150 struct lp_build_tgsi_context * bld_base,
4151 struct lp_build_emit_data * emit_data)
4152 {
4153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4154
4155 lp_exec_default(&bld->exec_mask, bld_base);
4156 }
4157
4158 static void
4159 switch_emit(
4160 const struct lp_build_tgsi_action * action,
4161 struct lp_build_tgsi_context * bld_base,
4162 struct lp_build_emit_data * emit_data)
4163 {
4164 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4165
4166 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4167 }
4168
4169 static void
4170 endswitch_emit(
4171 const struct lp_build_tgsi_action * action,
4172 struct lp_build_tgsi_context * bld_base,
4173 struct lp_build_emit_data * emit_data)
4174 {
4175 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4176
4177 lp_exec_endswitch(&bld->exec_mask, bld_base);
4178 }
4179
4180 static void
4181 bgnloop_emit(
4182 const struct lp_build_tgsi_action * action,
4183 struct lp_build_tgsi_context * bld_base,
4184 struct lp_build_emit_data * emit_data)
4185 {
4186 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4187
4188 lp_exec_bgnloop(&bld->exec_mask);
4189 }
4190
4191 static void
4192 bgnsub_emit(
4193 const struct lp_build_tgsi_action * action,
4194 struct lp_build_tgsi_context * bld_base,
4195 struct lp_build_emit_data * emit_data)
4196 {
4197 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4198
4199 lp_exec_mask_bgnsub(&bld->exec_mask);
4200 }
4201
4202 static void
4203 else_emit(
4204 const struct lp_build_tgsi_action * action,
4205 struct lp_build_tgsi_context * bld_base,
4206 struct lp_build_emit_data * emit_data)
4207 {
4208 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4209
4210 lp_exec_mask_cond_invert(&bld->exec_mask);
4211 }
4212
4213 static void
4214 endif_emit(
4215 const struct lp_build_tgsi_action * action,
4216 struct lp_build_tgsi_context * bld_base,
4217 struct lp_build_emit_data * emit_data)
4218 {
4219 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4220
4221 lp_exec_mask_cond_pop(&bld->exec_mask);
4222 }
4223
4224 static void
4225 endloop_emit(
4226 const struct lp_build_tgsi_action * action,
4227 struct lp_build_tgsi_context * bld_base,
4228 struct lp_build_emit_data * emit_data)
4229 {
4230 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4231
4232 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4233 }
4234
4235 static void
4236 endsub_emit(
4237 const struct lp_build_tgsi_action * action,
4238 struct lp_build_tgsi_context * bld_base,
4239 struct lp_build_emit_data * emit_data)
4240 {
4241 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4242
4243 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4244 }
4245
4246 static void
4247 cont_emit(
4248 const struct lp_build_tgsi_action * action,
4249 struct lp_build_tgsi_context * bld_base,
4250 struct lp_build_emit_data * emit_data)
4251 {
4252 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4253
4254 lp_exec_continue(&bld->exec_mask);
4255 }
4256
4257 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4258 {
4259 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4260 struct gallivm_state * gallivm = bld_base->base.gallivm;
4261
4262 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4263 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4264 bld->temps_array = lp_build_alloca_undef(gallivm,
4265 LLVMArrayType(bld_base->base.vec_type, array_size),
4266 "temp_array");
4267 }
4268
4269 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4270 LLVMValueRef array_size =
4271 lp_build_const_int32(gallivm,
4272 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4273 bld->outputs_array = lp_build_array_alloca(gallivm,
4274 bld_base->base.vec_type, array_size,
4275 "output_array");
4276 }
4277
4278 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4279 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4280 bld->imms_array = lp_build_alloca_undef(gallivm,
4281 LLVMArrayType(bld_base->base.vec_type, array_size),
4282 "imms_array");
4283 }
4284
4285 /* If we have indirect addressing in inputs we need to copy them into
4286 * our alloca array to be able to iterate over them */
4287 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
4288 unsigned index, chan;
4289 LLVMTypeRef vec_type = bld_base->base.vec_type;
4290 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4291 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4292 bld->inputs_array = lp_build_array_alloca(gallivm,
4293 vec_type, array_size,
4294 "input_array");
4295
4296 assert(bld_base->info->num_inputs
4297 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4298
4299 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4300 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4301 LLVMValueRef lindex =
4302 lp_build_const_int32(gallivm, index * 4 + chan);
4303 LLVMValueRef input_ptr =
4304 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4305 &lindex, 1, "");
4306 LLVMValueRef value = bld->inputs[index][chan];
4307 if (value)
4308 LLVMBuildStore(gallivm->builder, value, input_ptr);
4309 }
4310 }
4311 }
4312
4313 if (bld->gs_iface) {
4314 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4315 bld->emitted_prims_vec_ptr =
4316 lp_build_alloca(gallivm,
4317 uint_bld->vec_type,
4318 "emitted_prims_ptr");
4319 bld->emitted_vertices_vec_ptr =
4320 lp_build_alloca(gallivm,
4321 uint_bld->vec_type,
4322 "emitted_vertices_ptr");
4323 bld->total_emitted_vertices_vec_ptr =
4324 lp_build_alloca(gallivm,
4325 uint_bld->vec_type,
4326 "total_emitted_vertices_ptr");
4327
4328 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4329 bld->emitted_prims_vec_ptr);
4330 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4331 bld->emitted_vertices_vec_ptr);
4332 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4333 bld->total_emitted_vertices_vec_ptr);
4334 }
4335
4336 if (DEBUG_EXECUTION) {
4337 lp_build_printf(gallivm, "\n");
4338 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4339 if (!bld->gs_iface)
4340 emit_dump_file(bld, TGSI_FILE_INPUT);
4341 }
4342 }
4343
4344 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4345 {
4346 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4347 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4348
4349 if (DEBUG_EXECUTION) {
4350 /* for debugging */
4351 if (0) {
4352 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4353 }
4354 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4355 lp_build_printf(bld_base->base.gallivm, "\n");
4356 }
4357
4358 /* If we have indirect addressing in outputs we need to copy our alloca array
4359 * to the outputs slots specified by the caller */
4360 if (bld->gs_iface) {
4361 LLVMValueRef total_emitted_vertices_vec;
4362 LLVMValueRef emitted_prims_vec;
4363 /* implicit end_primitives, needed in case there are any unflushed
4364 vertices in the cache. Note must not call end_primitive here
4365 since the exec_mask is not valid at this point. */
4366 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4367
4368 total_emitted_vertices_vec =
4369 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4370 emitted_prims_vec =
4371 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4372
4373 bld->gs_iface->gs_epilogue(bld->gs_iface,
4374 &bld->bld_base,
4375 total_emitted_vertices_vec,
4376 emitted_prims_vec);
4377 } else {
4378 gather_outputs(bld);
4379 }
4380 }
4381
4382 void
4383 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4384 const struct tgsi_token *tokens,
4385 const struct lp_build_tgsi_params *params,
4386 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4387 {
4388 struct lp_build_tgsi_soa_context bld;
4389 struct lp_type type = params->type;
4390 struct lp_type res_type;
4391
4392 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4393 memset(&res_type, 0, sizeof res_type);
4394 res_type.width = type.width;
4395 res_type.length = type.length;
4396 res_type.sign = 1;
4397
4398 /* Setup build context */
4399 memset(&bld, 0, sizeof bld);
4400 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4401 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4402 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4403 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4404 {
4405 struct lp_type dbl_type;
4406 dbl_type = type;
4407 dbl_type.width *= 2;
4408 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4409 }
4410 {
4411 struct lp_type uint64_type;
4412 uint64_type = lp_uint_type(type);
4413 uint64_type.width *= 2;
4414 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4415 }
4416 {
4417 struct lp_type int64_type;
4418 int64_type = lp_int_type(type);
4419 int64_type.width *= 2;
4420 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4421 }
4422 bld.mask = params->mask;
4423 bld.inputs = params->inputs;
4424 bld.outputs = outputs;
4425 bld.consts_ptr = params->consts_ptr;
4426 bld.const_sizes_ptr = params->const_sizes_ptr;
4427 bld.ssbo_ptr = params->ssbo_ptr;
4428 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4429 bld.sampler = params->sampler;
4430 bld.bld_base.info = params->info;
4431 bld.indirect_files = params->info->indirect_files;
4432 bld.context_ptr = params->context_ptr;
4433 bld.thread_data_ptr = params->thread_data_ptr;
4434 bld.image = params->image;
4435 bld.shared_ptr = params->shared_ptr;
4436 bld.coro = params->coro;
4437
4438 /*
4439 * If the number of temporaries is rather large then we just
4440 * allocate them as an array right from the start and treat
4441 * like indirect temporaries.
4442 */
4443 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4444 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4445 }
4446 /*
4447 * For performance reason immediates are always backed in a static
4448 * array, but if their number is too great, we have to use just
4449 * a dynamically allocated array.
4450 */
4451 bld.use_immediates_array =
4452 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4453 if (bld.use_immediates_array) {
4454 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4455 }
4456
4457
4458 bld.bld_base.soa = TRUE;
4459 bld.bld_base.emit_debug = emit_debug;
4460 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4461 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4462 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4463 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4464 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4465 bld.bld_base.emit_store = emit_store;
4466
4467 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4468 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4469
4470 bld.bld_base.emit_prologue = emit_prologue;
4471 bld.bld_base.emit_epilogue = emit_epilogue;
4472
4473 /* Set opcode actions */
4474 lp_set_default_actions_cpu(&bld.bld_base);
4475
4476 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4477 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4478 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4479 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4480 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4481 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4482 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4483 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4484 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4485 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4486 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4487 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4488 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4489 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4490 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4491 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4492 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4493 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4494 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4495 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4496 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4497 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4498 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4499 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4500 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4501 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4502 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4503 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4504 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4505 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4506 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4507 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4508 /* DX10 sampling ops */
4509 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4510 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4511 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4520
4521 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4524
4525 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4527 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4528 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4529 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4535
4536 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4537 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4538
4539 if (params->gs_iface) {
4540 /* There's no specific value for this because it should always
4541 * be set, but apps using ext_geometry_shader4 quite often
4542 * were forgetting so we're using MAX_VERTEX_VARYING from
4543 * that spec even though we could debug_assert if it's not
4544 * set, but that's a lot uglier. */
4545 uint max_output_vertices;
4546
4547 /* inputs are always indirect with gs */
4548 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4549 bld.gs_iface = params->gs_iface;
4550 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4551 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4552 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4553
4554 max_output_vertices =
4555 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4556 if (!max_output_vertices)
4557 max_output_vertices = 32;
4558
4559 bld.max_output_vertices_vec =
4560 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4561 max_output_vertices);
4562 }
4563
4564 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4565
4566 bld.system_values = *params->system_values;
4567
4568 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4569
4570 if (0) {
4571 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4572 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4573 debug_printf("11111111111111111111111111111 \n");
4574 tgsi_dump(tokens, 0);
4575 lp_debug_dump_value(function);
4576 debug_printf("2222222222222222222222222222 \n");
4577 }
4578
4579 if (0) {
4580 LLVMModuleRef module = LLVMGetGlobalParent(
4581 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4582 LLVMDumpModule(module);
4583
4584 }
4585 lp_exec_mask_fini(&bld.exec_mask);
4586 }