s/Tungsten Graphics/VMware/
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_swizzle.h"
60 #include "lp_bld_flow.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_tgsi.h"
63 #include "lp_bld_limits.h"
64 #include "lp_bld_debug.h"
65 #include "lp_bld_printf.h"
66 #include "lp_bld_sample.h"
67 #include "lp_bld_struct.h"
68
69 #define DUMP_GS_EMITS 0
70
71 /*
72 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
73 * instruction.
74 *
75 * TODO:
76 * - take execution masks in consideration
77 * - debug control-flow instructions
78 */
79 #define DEBUG_EXECUTION 0
80
81
82 /*
83 * Emit code to print a register value.
84 */
85 static void
86 emit_dump_reg(struct gallivm_state *gallivm,
87 unsigned file,
88 unsigned index,
89 unsigned chan,
90 LLVMValueRef value)
91 {
92 char buf[32];
93
94 util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
95 tgsi_file_name(file),
96 index, "xyzw"[chan]);
97
98 lp_build_print_value(gallivm, buf, value);
99 }
100
101
102 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
103 {
104 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
105 LLVMBuilderRef builder = bld->gallivm->builder;
106
107 mask->bld = bld;
108 mask->has_mask = FALSE;
109 mask->ret_in_main = FALSE;
110 mask->cond_stack_size = 0;
111 mask->loop_stack_size = 0;
112 mask->call_stack_size = 0;
113 mask->switch_stack_size = 0;
114
115 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
116 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
117 mask->cond_mask = mask->switch_mask =
118 LLVMConstAllOnes(mask->int_vec_type);
119
120 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
121
122 LLVMBuildStore(
123 builder,
124 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
125 mask->loop_limiter);
126 }
127
128 static void lp_exec_mask_update(struct lp_exec_mask *mask)
129 {
130 LLVMBuilderRef builder = mask->bld->gallivm->builder;
131
132 if (mask->loop_stack_size) {
133 /*for loops we need to update the entire mask at runtime */
134 LLVMValueRef tmp;
135 assert(mask->break_mask);
136 tmp = LLVMBuildAnd(builder,
137 mask->cont_mask,
138 mask->break_mask,
139 "maskcb");
140 mask->exec_mask = LLVMBuildAnd(builder,
141 mask->cond_mask,
142 tmp,
143 "maskfull");
144 } else
145 mask->exec_mask = mask->cond_mask;
146
147 if (mask->switch_stack_size) {
148 mask->exec_mask = LLVMBuildAnd(builder,
149 mask->exec_mask,
150 mask->switch_mask,
151 "switchmask");
152 }
153
154 if (mask->call_stack_size || mask->ret_in_main) {
155 mask->exec_mask = LLVMBuildAnd(builder,
156 mask->exec_mask,
157 mask->ret_mask,
158 "callmask");
159 }
160
161 mask->has_mask = (mask->cond_stack_size > 0 ||
162 mask->loop_stack_size > 0 ||
163 mask->call_stack_size > 0 ||
164 mask->switch_stack_size > 0 ||
165 mask->ret_in_main);
166 }
167
168 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
169 LLVMValueRef val)
170 {
171 LLVMBuilderRef builder = mask->bld->gallivm->builder;
172
173 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
174 if (mask->cond_stack_size == 0) {
175 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
176 }
177 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
178 assert(LLVMTypeOf(val) == mask->int_vec_type);
179 mask->cond_mask = LLVMBuildAnd(builder,
180 mask->cond_mask,
181 val,
182 "");
183 lp_exec_mask_update(mask);
184 }
185
186 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
187 {
188 LLVMBuilderRef builder = mask->bld->gallivm->builder;
189 LLVMValueRef prev_mask;
190 LLVMValueRef inv_mask;
191
192 assert(mask->cond_stack_size);
193 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
194 if (mask->cond_stack_size == 1) {
195 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
196 }
197
198 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
199
200 mask->cond_mask = LLVMBuildAnd(builder,
201 inv_mask,
202 prev_mask, "");
203 lp_exec_mask_update(mask);
204 }
205
206 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
207 {
208 assert(mask->cond_stack_size);
209 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
210 lp_exec_mask_update(mask);
211 }
212
213 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
214 {
215 LLVMBuilderRef builder = mask->bld->gallivm->builder;
216
217 if (mask->loop_stack_size == 0) {
218 assert(mask->loop_block == NULL);
219 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
220 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
221 assert(mask->break_var == NULL);
222 }
223
224 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
225
226 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
227 mask->break_type;
228 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
229
230 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
231 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
232 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
233 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
234 ++mask->loop_stack_size;
235
236 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
237 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
238
239 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
240
241 LLVMBuildBr(builder, mask->loop_block);
242 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
243
244 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
245
246 lp_exec_mask_update(mask);
247 }
248
249 static void lp_exec_break(struct lp_exec_mask *mask,
250 struct lp_build_tgsi_context * bld_base)
251 {
252 LLVMBuilderRef builder = mask->bld->gallivm->builder;
253
254 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
255 LLVMValueRef exec_mask = LLVMBuildNot(builder,
256 mask->exec_mask,
257 "break");
258
259 mask->break_mask = LLVMBuildAnd(builder,
260 mask->break_mask,
261 exec_mask, "break_full");
262 }
263 else {
264 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
265 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
266 opcode == TGSI_OPCODE_CASE);
267
268
269 if (mask->switch_in_default) {
270 /*
271 * stop default execution but only if this is an unconditional switch.
272 * (The condition here is not perfect since dead code after break is
273 * allowed but should be sufficient since false negatives are just
274 * unoptimized - so we don't have to pre-evaluate that).
275 */
276 if(break_always && mask->switch_pc) {
277 bld_base->pc = mask->switch_pc;
278 return;
279 }
280 }
281
282 if (break_always) {
283 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
284 }
285 else {
286 LLVMValueRef exec_mask = LLVMBuildNot(builder,
287 mask->exec_mask,
288 "break");
289 mask->switch_mask = LLVMBuildAnd(builder,
290 mask->switch_mask,
291 exec_mask, "break_switch");
292 }
293 }
294
295 lp_exec_mask_update(mask);
296 }
297
298 static void lp_exec_break_condition(struct lp_exec_mask *mask,
299 LLVMValueRef cond)
300 {
301 LLVMBuilderRef builder = mask->bld->gallivm->builder;
302 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
303 mask->exec_mask,
304 cond, "cond_mask");
305 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
306
307 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
308 mask->break_mask = LLVMBuildAnd(builder,
309 mask->break_mask,
310 cond_mask, "breakc_full");
311 }
312 else {
313 mask->switch_mask = LLVMBuildAnd(builder,
314 mask->switch_mask,
315 cond_mask, "breakc_switch");
316 }
317
318 lp_exec_mask_update(mask);
319 }
320
321 static void lp_exec_continue(struct lp_exec_mask *mask)
322 {
323 LLVMBuilderRef builder = mask->bld->gallivm->builder;
324 LLVMValueRef exec_mask = LLVMBuildNot(builder,
325 mask->exec_mask,
326 "");
327
328 mask->cont_mask = LLVMBuildAnd(builder,
329 mask->cont_mask,
330 exec_mask, "");
331
332 lp_exec_mask_update(mask);
333 }
334
335
336 static void lp_exec_endloop(struct gallivm_state *gallivm,
337 struct lp_exec_mask *mask)
338 {
339 LLVMBuilderRef builder = mask->bld->gallivm->builder;
340 LLVMBasicBlockRef endloop;
341 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
342 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
343 mask->bld->type.width *
344 mask->bld->type.length);
345 LLVMValueRef i1cond, i2cond, icond, limiter;
346
347 assert(mask->break_mask);
348
349 /*
350 * Restore the cont_mask, but don't pop
351 */
352 assert(mask->loop_stack_size);
353 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
354 lp_exec_mask_update(mask);
355
356 /*
357 * Unlike the continue mask, the break_mask must be preserved across loop
358 * iterations
359 */
360 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
361
362 /* Decrement the loop limiter */
363 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
364
365 limiter = LLVMBuildSub(
366 builder,
367 limiter,
368 LLVMConstInt(int_type, 1, false),
369 "");
370
371 LLVMBuildStore(builder, limiter, mask->loop_limiter);
372
373 /* i1cond = (mask != 0) */
374 i1cond = LLVMBuildICmp(
375 builder,
376 LLVMIntNE,
377 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
378 LLVMConstNull(reg_type), "i1cond");
379
380 /* i2cond = (looplimiter > 0) */
381 i2cond = LLVMBuildICmp(
382 builder,
383 LLVMIntSGT,
384 limiter,
385 LLVMConstNull(int_type), "i2cond");
386
387 /* if( i1cond && i2cond ) */
388 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
389
390 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
391
392 LLVMBuildCondBr(builder,
393 icond, mask->loop_block, endloop);
394
395 LLVMPositionBuilderAtEnd(builder, endloop);
396
397 assert(mask->loop_stack_size);
398 --mask->loop_stack_size;
399 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
400 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
401 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
402 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
403 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
404
405 lp_exec_mask_update(mask);
406 }
407
408 static void lp_exec_switch(struct lp_exec_mask *mask,
409 LLVMValueRef switchval)
410 {
411 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
412 mask->break_type;
413 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
414
415 mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
416 mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
417 mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
418 mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
419 mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
420 mask->switch_stack_size++;
421
422 mask->switch_val = switchval;
423 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
424 mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
425 mask->switch_in_default = false;
426 mask->switch_pc = 0;
427
428 lp_exec_mask_update(mask);
429 }
430
431 static void lp_exec_endswitch(struct lp_exec_mask *mask,
432 struct lp_build_tgsi_context * bld_base)
433 {
434 LLVMBuilderRef builder = mask->bld->gallivm->builder;
435
436 /* check if there's deferred default if so do it now */
437 if (mask->switch_pc && !mask->switch_in_default) {
438 LLVMValueRef prevmask, defaultmask;
439 unsigned tmp_pc;
440 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
441 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
442 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
443 mask->switch_in_default = true;
444
445 lp_exec_mask_update(mask);
446
447 assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
448 TGSI_OPCODE_DEFAULT);
449
450 tmp_pc = bld_base->pc;
451 bld_base->pc = mask->switch_pc;
452 /*
453 * re-purpose switch_pc to point to here again, since we stop execution of
454 * the deferred default after next break.
455 */
456 mask->switch_pc = tmp_pc - 1;
457
458 return;
459 }
460
461 else if (mask->switch_pc && mask->switch_in_default) {
462 assert(bld_base->pc == mask->switch_pc + 1);
463 }
464
465 mask->switch_stack_size--;
466 mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
467 mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
468 mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
469 mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
470 mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
471
472 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
473
474 lp_exec_mask_update(mask);
475 }
476
477 static void lp_exec_case(struct lp_exec_mask *mask,
478 LLVMValueRef caseval)
479 {
480 LLVMBuilderRef builder = mask->bld->gallivm->builder;
481
482 LLVMValueRef casemask, prevmask;
483
484 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
485 if (!mask->switch_in_default) {
486 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
487 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
488 mask->switch_mask_default = LLVMBuildOr(builder, casemask,
489 mask->switch_mask_default, "sw_default_mask");
490 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
491 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
492
493 lp_exec_mask_update(mask);
494 }
495 }
496
497 /*
498 * Analyse default statement in a switch.
499 * \return true if default is last statement, false otherwise
500 * \param default_pc_start contains pc of instruction to jump to
501 * if default wasn't last but there's no
502 * fallthrough into default.
503 */
504 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
505 struct lp_build_tgsi_context * bld_base,
506 int *default_pc_start)
507 {
508 unsigned pc = bld_base->pc;
509 unsigned curr_switch_stack = mask->switch_stack_size;
510
511 /* skip over case statements which are together with default */
512 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
513 pc++;
514 }
515
516 while (pc != -1 && pc < bld_base->num_instructions) {
517 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
518 switch (opcode) {
519 case TGSI_OPCODE_CASE:
520 if (curr_switch_stack == mask->switch_stack_size) {
521 *default_pc_start = pc - 1;
522 return false;
523 }
524 break;
525 case TGSI_OPCODE_SWITCH:
526 curr_switch_stack++;
527 break;
528 case TGSI_OPCODE_ENDSWITCH:
529 if (curr_switch_stack == mask->switch_stack_size) {
530 *default_pc_start = pc - 1;
531 return true;
532 }
533 curr_switch_stack--;
534 break;
535 }
536 pc++;
537 }
538 /* should never arrive here */
539 assert(0);
540 return true;
541 }
542
543 static void lp_exec_default(struct lp_exec_mask *mask,
544 struct lp_build_tgsi_context * bld_base)
545 {
546 LLVMBuilderRef builder = mask->bld->gallivm->builder;
547
548 int default_exec_pc;
549 boolean default_is_last;
550
551 /*
552 * This is a messy opcode, because it may not be always at the end and
553 * there can be fallthrough in and out of it.
554 */
555
556 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
557 /*
558 * If it is last statement in switch (note that case statements appearing
559 * "at the same time" as default don't change that) everything is just fine,
560 * update switch mask and go on. This means we can handle default with
561 * fallthrough INTO it without overhead, if it is last.
562 */
563 if (default_is_last) {
564 LLVMValueRef prevmask, defaultmask;
565 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
566 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
567 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
568 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
569 mask->switch_in_default = true;
570
571 lp_exec_mask_update(mask);
572 }
573 else {
574 /*
575 * Technically, "case" immediately before default isn't really a
576 * fallthrough, however we still have to count them as such as we
577 * already have updated the masks.
578 * If that happens in practice could add a switch optimizer pass
579 * which just gets rid of all case statements appearing together with
580 * default (or could do switch analysis at switch start time instead).
581 */
582 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
583 boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
584 opcode != TGSI_OPCODE_SWITCH);
585 /*
586 * If it is not last statement and there was no fallthrough into it,
587 * we record the PC and continue execution at next case (again, those
588 * case encountered at the same time don't count). At endswitch
589 * time, we update switchmask, and go back executing the code we skipped
590 * until the next break (possibly re-executing some code with changed mask
591 * if there was a fallthrough out of default).
592 * Finally, if it is not last statement and there was a fallthrough into it,
593 * do the same as with the former case, except instead of skipping the code
594 * just execute it without updating the mask, then go back and re-execute.
595 */
596 mask->switch_pc = bld_base->pc;
597 if (!ft_into) {
598 bld_base->pc = default_exec_pc;
599 }
600 }
601 }
602
603
604 /* stores val into an address pointed to by dst_ptr.
605 * mask->exec_mask is used to figure out which bits of val
606 * should be stored into the address
607 * (0 means don't store this bit, 1 means do store).
608 */
609 static void lp_exec_mask_store(struct lp_exec_mask *mask,
610 struct lp_build_context *bld_store,
611 LLVMValueRef pred,
612 LLVMValueRef val,
613 LLVMValueRef dst_ptr)
614 {
615 LLVMBuilderRef builder = mask->bld->gallivm->builder;
616
617 assert(lp_check_value(bld_store->type, val));
618 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
619 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
620
621 /* Mix the predicate and execution mask */
622 if (mask->has_mask) {
623 if (pred) {
624 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
625 } else {
626 pred = mask->exec_mask;
627 }
628 }
629
630 if (pred) {
631 LLVMValueRef res, dst;
632
633 dst = LLVMBuildLoad(builder, dst_ptr, "");
634 res = lp_build_select(bld_store, pred, val, dst);
635 LLVMBuildStore(builder, res, dst_ptr);
636 } else
637 LLVMBuildStore(builder, val, dst_ptr);
638 }
639
640 static void lp_exec_mask_call(struct lp_exec_mask *mask,
641 int func,
642 int *pc)
643 {
644 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
645 mask->call_stack[mask->call_stack_size].pc = *pc;
646 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
647 mask->call_stack_size++;
648 *pc = func;
649 }
650
651 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
652 {
653 LLVMBuilderRef builder = mask->bld->gallivm->builder;
654 LLVMValueRef exec_mask;
655
656 if (mask->cond_stack_size == 0 &&
657 mask->loop_stack_size == 0 &&
658 mask->switch_stack_size == 0 &&
659 mask->call_stack_size == 0) {
660 /* returning from main() */
661 *pc = -1;
662 return;
663 }
664
665 if (mask->call_stack_size == 0) {
666 /*
667 * This requires special handling since we need to ensure
668 * we don't drop the mask even if we have no call stack
669 * (e.g. after a ret in a if clause after the endif)
670 */
671 mask->ret_in_main = TRUE;
672 }
673
674 exec_mask = LLVMBuildNot(builder,
675 mask->exec_mask,
676 "ret");
677
678 mask->ret_mask = LLVMBuildAnd(builder,
679 mask->ret_mask,
680 exec_mask, "ret_full");
681
682 lp_exec_mask_update(mask);
683 }
684
685 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
686 {
687 }
688
689 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
690 {
691 assert(mask->call_stack_size);
692 mask->call_stack_size--;
693 *pc = mask->call_stack[mask->call_stack_size].pc;
694 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
695 lp_exec_mask_update(mask);
696 }
697
698
699 static LLVMValueRef
700 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
701 unsigned file,
702 unsigned index,
703 unsigned chan)
704 {
705 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
706 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
707 LLVMValueRef var_of_array;
708
709 switch (file) {
710 case TGSI_FILE_TEMPORARY:
711 array_of_vars = bld->temps;
712 var_of_array = bld->temps_array;
713 break;
714 case TGSI_FILE_OUTPUT:
715 array_of_vars = bld->outputs;
716 var_of_array = bld->outputs_array;
717 break;
718 default:
719 assert(0);
720 return NULL;
721 }
722
723 assert(chan < 4);
724
725 if (bld->indirect_files & (1 << file)) {
726 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
727 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
728 }
729 else {
730 assert(index <= bld->bld_base.info->file_max[file]);
731 return array_of_vars[index][chan];
732 }
733 }
734
735
736 /**
737 * Return pointer to a temporary register channel (src or dest).
738 * Note that indirect addressing cannot be handled here.
739 * \param index which temporary register
740 * \param chan which channel of the temp register.
741 */
742 LLVMValueRef
743 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
744 unsigned index,
745 unsigned chan)
746 {
747 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
748 }
749
750 /**
751 * Return pointer to a output register channel (src or dest).
752 * Note that indirect addressing cannot be handled here.
753 * \param index which output register
754 * \param chan which channel of the output register.
755 */
756 LLVMValueRef
757 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
758 unsigned index,
759 unsigned chan)
760 {
761 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
762 }
763
764 /*
765 * If we have indirect addressing in outputs copy our alloca array
766 * to the outputs slots specified by the caller to make sure
767 * our outputs are delivered consistently via the same interface.
768 */
769 static void
770 gather_outputs(struct lp_build_tgsi_soa_context * bld)
771 {
772 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
773 unsigned index, chan;
774 assert(bld->bld_base.info->num_outputs <=
775 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
776 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
777 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
778 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
779 }
780 }
781 }
782 }
783
784 /**
785 * Gather vector.
786 * XXX the lp_build_gather() function should be capable of doing this
787 * with a little work.
788 */
789 static LLVMValueRef
790 build_gather(struct lp_build_context *bld,
791 LLVMValueRef base_ptr,
792 LLVMValueRef indexes,
793 LLVMValueRef *overflow_mask)
794 {
795 LLVMBuilderRef builder = bld->gallivm->builder;
796 LLVMValueRef res = bld->undef;
797 unsigned i;
798 LLVMValueRef temp_ptr;
799
800 if (overflow_mask) {
801 temp_ptr = lp_build_alloca(
802 bld->gallivm,
803 lp_build_vec_type(bld->gallivm, bld->type), "");
804 }
805
806 /*
807 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
808 */
809 for (i = 0; i < bld->type.length; i++) {
810 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
811 LLVMValueRef index = LLVMBuildExtractElement(builder,
812 indexes, ii, "");
813 LLVMValueRef scalar_ptr, scalar;
814 LLVMValueRef overflow;
815 struct lp_build_if_state if_ctx;
816
817 /*
818 * overflow_mask is a boolean vector telling us which channels
819 * in the vector overflowed. We use the overflow behavior for
820 * constant buffers which is defined as:
821 * Out of bounds access to constant buffer returns 0 in all
822 * componenets. Out of bounds behavior is always with respect
823 * to the size of the buffer bound at that slot.
824 */
825 if (overflow_mask) {
826 overflow = LLVMBuildExtractElement(builder, *overflow_mask,
827 ii, "");
828 lp_build_if(&if_ctx, bld->gallivm, overflow);
829 {
830 LLVMValueRef val = LLVMBuildLoad(builder, temp_ptr, "");
831 val = LLVMBuildInsertElement(
832 builder, val,
833 LLVMConstNull(LLVMFloatTypeInContext(bld->gallivm->context)),
834 ii, "");
835 LLVMBuildStore(builder, val, temp_ptr);
836 }
837 lp_build_else(&if_ctx);
838 {
839 LLVMValueRef val = LLVMBuildLoad(builder, temp_ptr, "");
840
841 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
842 &index, 1, "gather_ptr");
843 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
844
845 val = LLVMBuildInsertElement(builder, val, scalar, ii, "");
846
847 LLVMBuildStore(builder, val, temp_ptr);
848 }
849 lp_build_endif(&if_ctx);
850 } else {
851 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
852 &index, 1, "gather_ptr");
853 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
854
855 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
856 }
857 }
858
859 if (overflow_mask) {
860 res = LLVMBuildLoad(builder, temp_ptr, "gather_val");
861 }
862
863 return res;
864 }
865
866
867 /**
868 * Scatter/store vector.
869 */
870 static void
871 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
872 LLVMValueRef base_ptr,
873 LLVMValueRef indexes,
874 LLVMValueRef values,
875 struct lp_exec_mask *mask,
876 LLVMValueRef pred)
877 {
878 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
879 LLVMBuilderRef builder = gallivm->builder;
880 unsigned i;
881
882 /* Mix the predicate and execution mask */
883 if (mask->has_mask) {
884 if (pred) {
885 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
886 }
887 else {
888 pred = mask->exec_mask;
889 }
890 }
891
892 /*
893 * Loop over elements of index_vec, store scalar value.
894 */
895 for (i = 0; i < bld->bld_base.base.type.length; i++) {
896 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
897 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
898 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
899 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
900 LLVMValueRef scalar_pred = pred ?
901 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
902
903 if (0)
904 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
905 ii, val, index, scalar_ptr);
906
907 if (scalar_pred) {
908 LLVMValueRef real_val, dst_val;
909 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
910 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
911 LLVMBuildStore(builder, real_val, scalar_ptr);
912 }
913 else {
914 LLVMBuildStore(builder, val, scalar_ptr);
915 }
916 }
917 }
918
919
920 /**
921 * Read the current value of the ADDR register, convert the floats to
922 * ints, add the base index and return the vector of offsets.
923 * The offsets will be used to index into the constant buffer or
924 * temporary register file.
925 */
926 static LLVMValueRef
927 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
928 unsigned reg_file, unsigned reg_index,
929 const struct tgsi_ind_register *indirect_reg)
930 {
931 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
932 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
933 /* always use X component of address register */
934 unsigned swizzle = indirect_reg->Swizzle;
935 LLVMValueRef base;
936 LLVMValueRef rel;
937 LLVMValueRef max_index;
938 LLVMValueRef index;
939
940 assert(bld->indirect_files & (1 << reg_file));
941
942 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
943
944 assert(swizzle < 4);
945 switch (indirect_reg->File) {
946 case TGSI_FILE_ADDRESS:
947 rel = LLVMBuildLoad(builder,
948 bld->addr[indirect_reg->Index][swizzle],
949 "load addr reg");
950 /* ADDR LLVM values already have LLVM integer type. */
951 break;
952 case TGSI_FILE_TEMPORARY:
953 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
954 rel = LLVMBuildLoad(builder, rel, "load temp reg");
955 /* TEMP LLVM values always have LLVM float type, but for indirection, the
956 * value actually stored is expected to be an integer */
957 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
958 break;
959 default:
960 assert(0);
961 rel = uint_bld->zero;
962 }
963
964 index = lp_build_add(uint_bld, base, rel);
965
966 /*
967 * emit_fetch_constant handles constant buffer overflow so this code
968 * is pointless for them.
969 * Furthermore the D3D10 spec in section 6.5 says:
970 * If the constant buffer bound to a slot is larger than the size
971 * declared in the shader for that slot, implementations are allowed
972 * to return incorrect data (not necessarily 0) for indices that are
973 * larger than the declared size but smaller than the buffer size.
974 */
975 if (reg_file != TGSI_FILE_CONSTANT) {
976 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
977 uint_bld->type,
978 bld->bld_base.info->file_max[reg_file]);
979
980 assert(!uint_bld->type.sign);
981 index = lp_build_min(uint_bld, index, max_index);
982 }
983
984 return index;
985 }
986
987 static struct lp_build_context *
988 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
989 enum tgsi_opcode_type stype)
990 {
991 struct lp_build_context *bld_fetch;
992
993 switch (stype) {
994 case TGSI_TYPE_FLOAT:
995 case TGSI_TYPE_UNTYPED:
996 bld_fetch = &bld_base->base;
997 break;
998 case TGSI_TYPE_UNSIGNED:
999 bld_fetch = &bld_base->uint_bld;
1000 break;
1001 case TGSI_TYPE_SIGNED:
1002 bld_fetch = &bld_base->int_bld;
1003 break;
1004 case TGSI_TYPE_VOID:
1005 case TGSI_TYPE_DOUBLE:
1006 default:
1007 assert(0);
1008 bld_fetch = NULL;
1009 break;
1010 }
1011 return bld_fetch;
1012 }
1013
1014 static LLVMValueRef
1015 get_soa_array_offsets(struct lp_build_context *uint_bld,
1016 LLVMValueRef indirect_index,
1017 unsigned chan_index,
1018 boolean need_perelement_offset)
1019 {
1020 struct gallivm_state *gallivm = uint_bld->gallivm;
1021 LLVMValueRef chan_vec =
1022 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1023 LLVMValueRef length_vec =
1024 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1025 LLVMValueRef index_vec;
1026
1027 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1028 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1029 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1030 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1031
1032 if (need_perelement_offset) {
1033 LLVMValueRef pixel_offsets;
1034 int i;
1035 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1036 pixel_offsets = uint_bld->undef;
1037 for (i = 0; i < uint_bld->type.length; i++) {
1038 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1039 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1040 ii, ii, "");
1041 }
1042 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1043 }
1044 return index_vec;
1045 }
1046
1047 static LLVMValueRef
1048 emit_fetch_constant(
1049 struct lp_build_tgsi_context * bld_base,
1050 const struct tgsi_full_src_register * reg,
1051 enum tgsi_opcode_type stype,
1052 unsigned swizzle)
1053 {
1054 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1055 struct gallivm_state *gallivm = bld_base->base.gallivm;
1056 LLVMBuilderRef builder = gallivm->builder;
1057 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1058 unsigned dimension = 0;
1059 LLVMValueRef dimension_index;
1060 LLVMValueRef consts_ptr;
1061 LLVMValueRef num_consts;
1062 LLVMValueRef res;
1063
1064 /* XXX: Handle fetching xyzw components as a vector */
1065 assert(swizzle != ~0);
1066
1067 if (reg->Register.Dimension) {
1068 assert(!reg->Dimension.Indirect);
1069 dimension = reg->Dimension.Index;
1070 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1071 }
1072
1073 dimension_index = lp_build_const_int32(gallivm, dimension);
1074 consts_ptr =
1075 lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
1076 num_consts =
1077 lp_build_array_get(gallivm, bld->const_sizes_ptr, dimension_index);
1078
1079 if (reg->Register.Indirect) {
1080 LLVMValueRef indirect_index;
1081 LLVMValueRef swizzle_vec =
1082 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1083 LLVMValueRef index_vec; /* index into the const buffer */
1084 LLVMValueRef overflow_mask;
1085
1086 indirect_index = get_indirect_index(bld,
1087 reg->Register.File,
1088 reg->Register.Index,
1089 &reg->Indirect);
1090
1091 /* All fetches are from the same constant buffer, so
1092 * we need to propagate the size to a vector to do a
1093 * vector comparison */
1094 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1095 /* Construct a boolean vector telling us which channels
1096 * overflow the bound constant buffer */
1097 overflow_mask = LLVMBuildICmp(builder, LLVMIntUGE,
1098 indirect_index,
1099 num_consts, "");
1100
1101 /* index_vec = indirect_index * 4 + swizzle */
1102 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1103 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1104
1105 /* Gather values from the constant buffer */
1106 res = build_gather(&bld_base->base, consts_ptr, index_vec,
1107 &overflow_mask);
1108 }
1109 else {
1110 LLVMValueRef index; /* index into the const buffer */
1111 LLVMValueRef scalar, scalar_ptr;
1112
1113 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1114
1115 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1116 &index, 1, "");
1117 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1118 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
1119 }
1120
1121 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1122 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1123 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1124 }
1125
1126 return res;
1127 }
1128
1129 static LLVMValueRef
1130 emit_fetch_immediate(
1131 struct lp_build_tgsi_context * bld_base,
1132 const struct tgsi_full_src_register * reg,
1133 enum tgsi_opcode_type stype,
1134 unsigned swizzle)
1135 {
1136 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1137 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1138 LLVMBuilderRef builder = gallivm->builder;
1139 LLVMValueRef res = NULL;
1140
1141 if (reg->Register.Indirect) {
1142 LLVMValueRef indirect_index;
1143 LLVMValueRef index_vec; /* index into the immediate register array */
1144 LLVMValueRef imms_array;
1145 LLVMTypeRef fptr_type;
1146
1147 indirect_index = get_indirect_index(bld,
1148 reg->Register.File,
1149 reg->Register.Index,
1150 &reg->Indirect);
1151 /*
1152 * Unlike for other reg classes, adding pixel offsets is unnecessary -
1153 * immediates are stored as full vectors (FIXME??? - might be better
1154 * to store them the same as constants) but all elements are the same
1155 * in any case.
1156 */
1157 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1158 indirect_index,
1159 swizzle,
1160 FALSE);
1161
1162 /* cast imms_array pointer to float* */
1163 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1164 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1165
1166 /* Gather values from the immediate register array */
1167 res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
1168 }
1169 else {
1170 res = bld->immediates[reg->Register.Index][swizzle];
1171 }
1172
1173 if (stype == TGSI_TYPE_UNSIGNED) {
1174 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1175 } else if (stype == TGSI_TYPE_SIGNED) {
1176 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1177 }
1178 return res;
1179 }
1180
1181 static LLVMValueRef
1182 emit_fetch_input(
1183 struct lp_build_tgsi_context * bld_base,
1184 const struct tgsi_full_src_register * reg,
1185 enum tgsi_opcode_type stype,
1186 unsigned swizzle)
1187 {
1188 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1189 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1190 LLVMBuilderRef builder = gallivm->builder;
1191 LLVMValueRef res;
1192
1193 if (reg->Register.Indirect) {
1194 LLVMValueRef indirect_index;
1195 LLVMValueRef index_vec; /* index into the input reg array */
1196 LLVMValueRef inputs_array;
1197 LLVMTypeRef fptr_type;
1198
1199 indirect_index = get_indirect_index(bld,
1200 reg->Register.File,
1201 reg->Register.Index,
1202 &reg->Indirect);
1203
1204 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1205 indirect_index,
1206 swizzle,
1207 TRUE);
1208
1209 /* cast inputs_array pointer to float* */
1210 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1211 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1212
1213 /* Gather values from the input register array */
1214 res = build_gather(&bld_base->base, inputs_array, index_vec, NULL);
1215 } else {
1216 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1217 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1218 reg->Register.Index * 4 + swizzle);
1219 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1220 bld->inputs_array, &lindex, 1, "");
1221 res = LLVMBuildLoad(builder, input_ptr, "");
1222 }
1223 else {
1224 res = bld->inputs[reg->Register.Index][swizzle];
1225 }
1226 }
1227
1228 assert(res);
1229
1230 if (stype == TGSI_TYPE_UNSIGNED) {
1231 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1232 } else if (stype == TGSI_TYPE_SIGNED) {
1233 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1234 }
1235
1236 return res;
1237 }
1238
1239
1240 static LLVMValueRef
1241 emit_fetch_gs_input(
1242 struct lp_build_tgsi_context * bld_base,
1243 const struct tgsi_full_src_register * reg,
1244 enum tgsi_opcode_type stype,
1245 unsigned swizzle)
1246 {
1247 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1248 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1249 LLVMBuilderRef builder = gallivm->builder;
1250 LLVMValueRef attrib_index = NULL;
1251 LLVMValueRef vertex_index = NULL;
1252 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1253 LLVMValueRef res;
1254
1255 if (reg->Register.Indirect) {
1256 attrib_index = get_indirect_index(bld,
1257 reg->Register.File,
1258 reg->Register.Index,
1259 &reg->Indirect);
1260 } else {
1261 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1262 }
1263
1264 if (reg->Dimension.Indirect) {
1265 vertex_index = get_indirect_index(bld,
1266 reg->Register.File,
1267 reg->Dimension.Index,
1268 &reg->DimIndirect);
1269 } else {
1270 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1271 }
1272
1273 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1274 reg->Dimension.Indirect,
1275 vertex_index,
1276 reg->Register.Indirect,
1277 attrib_index,
1278 swizzle_index);
1279
1280 assert(res);
1281
1282 if (stype == TGSI_TYPE_UNSIGNED) {
1283 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1284 } else if (stype == TGSI_TYPE_SIGNED) {
1285 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1286 }
1287
1288 return res;
1289 }
1290
1291 static LLVMValueRef
1292 emit_fetch_temporary(
1293 struct lp_build_tgsi_context * bld_base,
1294 const struct tgsi_full_src_register * reg,
1295 enum tgsi_opcode_type stype,
1296 unsigned swizzle)
1297 {
1298 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1299 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1300 LLVMBuilderRef builder = gallivm->builder;
1301 LLVMValueRef res;
1302
1303 if (reg->Register.Indirect) {
1304 LLVMValueRef indirect_index;
1305 LLVMValueRef index_vec; /* index into the temp reg array */
1306 LLVMValueRef temps_array;
1307 LLVMTypeRef fptr_type;
1308
1309 indirect_index = get_indirect_index(bld,
1310 reg->Register.File,
1311 reg->Register.Index,
1312 &reg->Indirect);
1313
1314 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1315 indirect_index,
1316 swizzle,
1317 TRUE);
1318
1319 /* cast temps_array pointer to float* */
1320 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1321 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1322
1323 /* Gather values from the temporary register array */
1324 res = build_gather(&bld_base->base, temps_array, index_vec, NULL);
1325 }
1326 else {
1327 LLVMValueRef temp_ptr;
1328 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1329 res = LLVMBuildLoad(builder, temp_ptr, "");
1330 }
1331
1332 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1333 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1334 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1335 }
1336
1337 return res;
1338 }
1339
1340 static LLVMValueRef
1341 emit_fetch_system_value(
1342 struct lp_build_tgsi_context * bld_base,
1343 const struct tgsi_full_src_register * reg,
1344 enum tgsi_opcode_type stype,
1345 unsigned swizzle)
1346 {
1347 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1348 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1349 const struct tgsi_shader_info *info = bld->bld_base.info;
1350 LLVMBuilderRef builder = gallivm->builder;
1351 LLVMValueRef res;
1352 enum tgsi_opcode_type atype; // Actual type of the value
1353
1354 assert(!reg->Register.Indirect);
1355
1356 switch (info->system_value_semantic_name[reg->Register.Index]) {
1357 case TGSI_SEMANTIC_INSTANCEID:
1358 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1359 atype = TGSI_TYPE_UNSIGNED;
1360 break;
1361
1362 case TGSI_SEMANTIC_VERTEXID:
1363 res = bld->system_values.vertex_id;
1364 atype = TGSI_TYPE_UNSIGNED;
1365 break;
1366
1367 case TGSI_SEMANTIC_PRIMID:
1368 res = bld->system_values.prim_id;
1369 atype = TGSI_TYPE_UNSIGNED;
1370 break;
1371
1372 default:
1373 assert(!"unexpected semantic in emit_fetch_system_value");
1374 res = bld_base->base.zero;
1375 atype = TGSI_TYPE_FLOAT;
1376 break;
1377 }
1378
1379 if (atype != stype) {
1380 if (stype == TGSI_TYPE_FLOAT) {
1381 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1382 } else if (stype == TGSI_TYPE_UNSIGNED) {
1383 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1384 } else if (stype == TGSI_TYPE_SIGNED) {
1385 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1386 }
1387 }
1388
1389 return res;
1390 }
1391
1392 /**
1393 * Register fetch with derivatives.
1394 */
1395 static void
1396 emit_fetch_deriv(
1397 struct lp_build_tgsi_soa_context *bld,
1398 LLVMValueRef src,
1399 LLVMValueRef *res,
1400 LLVMValueRef *ddx,
1401 LLVMValueRef *ddy)
1402 {
1403 if(res)
1404 *res = src;
1405
1406 /* TODO: use interpolation coeffs for inputs */
1407
1408 if(ddx)
1409 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1410
1411 if(ddy)
1412 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1413 }
1414
1415
1416 /**
1417 * Predicate.
1418 */
1419 static void
1420 emit_fetch_predicate(
1421 struct lp_build_tgsi_soa_context *bld,
1422 const struct tgsi_full_instruction *inst,
1423 LLVMValueRef *pred)
1424 {
1425 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1426 unsigned index;
1427 unsigned char swizzles[4];
1428 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1429 LLVMValueRef value;
1430 unsigned chan;
1431
1432 if (!inst->Instruction.Predicate) {
1433 TGSI_FOR_EACH_CHANNEL( chan ) {
1434 pred[chan] = NULL;
1435 }
1436 return;
1437 }
1438
1439 swizzles[0] = inst->Predicate.SwizzleX;
1440 swizzles[1] = inst->Predicate.SwizzleY;
1441 swizzles[2] = inst->Predicate.SwizzleZ;
1442 swizzles[3] = inst->Predicate.SwizzleW;
1443
1444 index = inst->Predicate.Index;
1445 assert(index < LP_MAX_TGSI_PREDS);
1446
1447 TGSI_FOR_EACH_CHANNEL( chan ) {
1448 unsigned swizzle = swizzles[chan];
1449
1450 /*
1451 * Only fetch the predicate register channels that are actually listed
1452 * in the swizzles
1453 */
1454 if (!unswizzled[swizzle]) {
1455 value = LLVMBuildLoad(builder,
1456 bld->preds[index][swizzle], "");
1457
1458 /*
1459 * Convert the value to an integer mask.
1460 *
1461 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1462 * is needlessly causing two comparisons due to storing the intermediate
1463 * result as float vector instead of an integer mask vector.
1464 */
1465 value = lp_build_compare(bld->bld_base.base.gallivm,
1466 bld->bld_base.base.type,
1467 PIPE_FUNC_NOTEQUAL,
1468 value,
1469 bld->bld_base.base.zero);
1470 if (inst->Predicate.Negate) {
1471 value = LLVMBuildNot(builder, value, "");
1472 }
1473
1474 unswizzled[swizzle] = value;
1475 } else {
1476 value = unswizzled[swizzle];
1477 }
1478
1479 pred[chan] = value;
1480 }
1481 }
1482
1483
1484 /**
1485 * Register store.
1486 */
1487 static void
1488 emit_store_chan(
1489 struct lp_build_tgsi_context *bld_base,
1490 const struct tgsi_full_instruction *inst,
1491 unsigned index,
1492 unsigned chan_index,
1493 LLVMValueRef pred,
1494 LLVMValueRef value)
1495 {
1496 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1497 struct gallivm_state *gallivm = bld_base->base.gallivm;
1498 LLVMBuilderRef builder = gallivm->builder;
1499 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1500 struct lp_build_context *float_bld = &bld_base->base;
1501 struct lp_build_context *int_bld = &bld_base->int_bld;
1502 LLVMValueRef indirect_index = NULL;
1503 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1504
1505 /*
1506 * Apply saturation.
1507 *
1508 * It is always assumed to be float.
1509 */
1510 switch( inst->Instruction.Saturate ) {
1511 case TGSI_SAT_NONE:
1512 break;
1513
1514 case TGSI_SAT_ZERO_ONE:
1515 assert(dtype == TGSI_TYPE_FLOAT ||
1516 dtype == TGSI_TYPE_UNTYPED);
1517 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1518 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1519 break;
1520
1521 case TGSI_SAT_MINUS_PLUS_ONE:
1522 assert(dtype == TGSI_TYPE_FLOAT ||
1523 dtype == TGSI_TYPE_UNTYPED);
1524 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1525 /* This will give -1.0 for NaN which is probably not what we want. */
1526 value = lp_build_max_ext(float_bld, value,
1527 lp_build_const_vec(gallivm, float_bld->type, -1.0),
1528 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1529 value = lp_build_min(float_bld, value, float_bld->one);
1530 break;
1531
1532 default:
1533 assert(0);
1534 }
1535
1536 if (reg->Register.Indirect) {
1537 indirect_index = get_indirect_index(bld,
1538 reg->Register.File,
1539 reg->Register.Index,
1540 &reg->Indirect);
1541 } else {
1542 assert(reg->Register.Index <=
1543 bld_base->info->file_max[reg->Register.File]);
1544 }
1545
1546 if (DEBUG_EXECUTION) {
1547 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1548 }
1549
1550 switch( reg->Register.File ) {
1551 case TGSI_FILE_OUTPUT:
1552 /* Outputs are always stored as floats */
1553 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1554
1555 if (reg->Register.Indirect) {
1556 LLVMValueRef index_vec; /* indexes into the output registers */
1557 LLVMValueRef outputs_array;
1558 LLVMTypeRef fptr_type;
1559
1560 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1561 indirect_index,
1562 chan_index,
1563 TRUE);
1564
1565 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1566 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1567
1568 /* Scatter store values into output registers */
1569 emit_mask_scatter(bld, outputs_array, index_vec, value,
1570 &bld->exec_mask, pred);
1571 }
1572 else {
1573 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1574 chan_index);
1575 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1576 }
1577 break;
1578
1579 case TGSI_FILE_TEMPORARY:
1580 /* Temporaries are always stored as floats */
1581 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1582
1583 if (reg->Register.Indirect) {
1584 LLVMValueRef index_vec; /* indexes into the temp registers */
1585 LLVMValueRef temps_array;
1586 LLVMTypeRef fptr_type;
1587
1588 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1589 indirect_index,
1590 chan_index,
1591 TRUE);
1592
1593 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1594 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1595
1596 /* Scatter store values into temp registers */
1597 emit_mask_scatter(bld, temps_array, index_vec, value,
1598 &bld->exec_mask, pred);
1599 }
1600 else {
1601 LLVMValueRef temp_ptr;
1602 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1603 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1604 }
1605 break;
1606
1607 case TGSI_FILE_ADDRESS:
1608 assert(dtype == TGSI_TYPE_SIGNED);
1609 assert(LLVMTypeOf(value) == int_bld->vec_type);
1610 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1611 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1612 bld->addr[reg->Register.Index][chan_index]);
1613 break;
1614
1615 case TGSI_FILE_PREDICATE:
1616 assert(LLVMTypeOf(value) == float_bld->vec_type);
1617 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1618 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1619 bld->preds[reg->Register.Index][chan_index]);
1620 break;
1621
1622 default:
1623 assert( 0 );
1624 }
1625
1626 (void)dtype;
1627 }
1628
1629 /*
1630 * Called at the beginning of the translation of each TGSI instruction, to
1631 * emit some debug code.
1632 */
1633 static void
1634 emit_debug(
1635 struct lp_build_tgsi_context * bld_base,
1636 const struct tgsi_full_instruction * inst,
1637 const struct tgsi_opcode_info * info)
1638
1639 {
1640 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1641
1642 if (DEBUG_EXECUTION) {
1643 /*
1644 * Dump the TGSI instruction.
1645 */
1646
1647 struct gallivm_state *gallivm = bld_base->base.gallivm;
1648 char buf[512];
1649 buf[0] = '$';
1650 buf[1] = ' ';
1651 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1652 lp_build_printf(gallivm, buf);
1653
1654 /* Dump the execution mask.
1655 */
1656 if (bld->exec_mask.has_mask) {
1657 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1658 }
1659 }
1660 }
1661
1662 static void
1663 emit_store(
1664 struct lp_build_tgsi_context * bld_base,
1665 const struct tgsi_full_instruction * inst,
1666 const struct tgsi_opcode_info * info,
1667 LLVMValueRef dst[4])
1668
1669 {
1670 unsigned chan_index;
1671 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1672
1673 if(info->num_dst) {
1674 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1675
1676 emit_fetch_predicate( bld, inst, pred );
1677
1678 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1679 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1680 }
1681 }
1682 }
1683
1684 static unsigned
1685 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1686 {
1687 switch (tgsi_target) {
1688 case TGSI_TEXTURE_BUFFER:
1689 return PIPE_BUFFER;
1690 case TGSI_TEXTURE_1D:
1691 case TGSI_TEXTURE_SHADOW1D:
1692 return PIPE_TEXTURE_1D;
1693 case TGSI_TEXTURE_2D:
1694 case TGSI_TEXTURE_SHADOW2D:
1695 case TGSI_TEXTURE_2D_MSAA:
1696 return PIPE_TEXTURE_2D;
1697 case TGSI_TEXTURE_3D:
1698 return PIPE_TEXTURE_3D;
1699 case TGSI_TEXTURE_CUBE:
1700 case TGSI_TEXTURE_SHADOWCUBE:
1701 return PIPE_TEXTURE_CUBE;
1702 case TGSI_TEXTURE_RECT:
1703 case TGSI_TEXTURE_SHADOWRECT:
1704 return PIPE_TEXTURE_RECT;
1705 case TGSI_TEXTURE_1D_ARRAY:
1706 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1707 return PIPE_TEXTURE_1D_ARRAY;
1708 case TGSI_TEXTURE_2D_ARRAY:
1709 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1710 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1711 return PIPE_TEXTURE_2D_ARRAY;
1712 case TGSI_TEXTURE_CUBE_ARRAY:
1713 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1714 return PIPE_TEXTURE_CUBE_ARRAY;
1715 default:
1716 assert(0);
1717 return PIPE_BUFFER;
1718 }
1719 }
1720
1721
1722 static enum lp_sampler_lod_property
1723 lp_build_lod_property(
1724 struct lp_build_tgsi_context *bld_base,
1725 const struct tgsi_full_instruction *inst,
1726 unsigned src_op)
1727 {
1728 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1729 enum lp_sampler_lod_property lod_property;
1730
1731 /*
1732 * Not much we can do here. We could try catching inputs declared
1733 * with constant interpolation but not sure it's worth it - since for
1734 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1735 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1736 * like the constant/immediate recognition below.
1737 * What seems to be of more value would be to recognize temps holding
1738 * broadcasted scalars but no way we can do it.
1739 * Tried asking llvm but without any success (using LLVMIsConstant
1740 * even though this isn't exactly what we'd need), even as simple as
1741 * IMM[0] UINT32 (0,-1,0,0)
1742 * MOV TEMP[0] IMM[0].yyyy
1743 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1744 * doesn't work.
1745 * This means there's ZERO chance this will ever catch a scalar lod
1746 * with traditional tex opcodes as well as texel fetches, since the lod
1747 * comes from the same reg as coords (except some test shaders using
1748 * constant coords maybe).
1749 * There's at least hope for sample opcodes as well as size queries.
1750 */
1751 if (reg->Register.File == TGSI_FILE_CONSTANT ||
1752 reg->Register.File == TGSI_FILE_IMMEDIATE) {
1753 lod_property = LP_SAMPLER_LOD_SCALAR;
1754 }
1755 else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
1756 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1757 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1758 }
1759 else {
1760 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1761 }
1762 }
1763 else {
1764 /* never use scalar (per-quad) lod the results are just too wrong. */
1765 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1766 }
1767 return lod_property;
1768 }
1769
1770
1771 /**
1772 * High-level instruction translators.
1773 */
1774
1775 static void
1776 emit_tex( struct lp_build_tgsi_soa_context *bld,
1777 const struct tgsi_full_instruction *inst,
1778 enum lp_build_tex_modifier modifier,
1779 LLVMValueRef *texel)
1780 {
1781 unsigned unit;
1782 LLVMValueRef lod_bias, explicit_lod;
1783 LLVMValueRef oow = NULL;
1784 LLVMValueRef coords[5];
1785 LLVMValueRef offsets[3] = { NULL };
1786 struct lp_derivatives derivs;
1787 struct lp_derivatives *deriv_ptr = NULL;
1788 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1789 unsigned num_derivs, num_offsets, i;
1790 unsigned shadow_coord = 0;
1791 unsigned layer_coord = 0;
1792
1793 if (!bld->sampler) {
1794 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1795 for (i = 0; i < 4; i++) {
1796 texel[i] = bld->bld_base.base.undef;
1797 }
1798 return;
1799 }
1800
1801 switch (inst->Texture.Texture) {
1802 case TGSI_TEXTURE_1D_ARRAY:
1803 layer_coord = 1;
1804 /* fallthrough */
1805 case TGSI_TEXTURE_1D:
1806 num_offsets = 1;
1807 num_derivs = 1;
1808 break;
1809 case TGSI_TEXTURE_2D_ARRAY:
1810 layer_coord = 2;
1811 /* fallthrough */
1812 case TGSI_TEXTURE_2D:
1813 case TGSI_TEXTURE_RECT:
1814 num_offsets = 2;
1815 num_derivs = 2;
1816 break;
1817 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1818 layer_coord = 1;
1819 /* fallthrough */
1820 case TGSI_TEXTURE_SHADOW1D:
1821 shadow_coord = 2;
1822 num_offsets = 1;
1823 num_derivs = 1;
1824 break;
1825 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1826 layer_coord = 2;
1827 shadow_coord = 3;
1828 num_offsets = 2;
1829 num_derivs = 2;
1830 break;
1831 case TGSI_TEXTURE_SHADOW2D:
1832 case TGSI_TEXTURE_SHADOWRECT:
1833 shadow_coord = 2;
1834 num_offsets = 2;
1835 num_derivs = 2;
1836 break;
1837 case TGSI_TEXTURE_CUBE:
1838 num_offsets = 2;
1839 num_derivs = 3;
1840 break;
1841 case TGSI_TEXTURE_3D:
1842 num_offsets = 3;
1843 num_derivs = 3;
1844 break;
1845 case TGSI_TEXTURE_SHADOWCUBE:
1846 shadow_coord = 3;
1847 num_offsets = 2;
1848 num_derivs = 3;
1849 break;
1850 case TGSI_TEXTURE_CUBE_ARRAY:
1851 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1852 case TGSI_TEXTURE_2D_MSAA:
1853 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1854 default:
1855 assert(0);
1856 return;
1857 }
1858
1859 /* Note lod and especially projected are illegal in a LOT of cases */
1860 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
1861 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1862 LLVMValueRef lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1863 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1864 lod_bias = lod;
1865 explicit_lod = NULL;
1866 }
1867 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1868 lod_bias = NULL;
1869 explicit_lod = lod;
1870 }
1871 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
1872 }
1873 else {
1874 lod_bias = NULL;
1875 explicit_lod = NULL;
1876 }
1877
1878 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1879 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1880 oow = lp_build_rcp(&bld->bld_base.base, oow);
1881 }
1882
1883 for (i = 0; i < num_derivs; i++) {
1884 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
1885 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1886 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1887 }
1888 for (i = num_derivs; i < 5; i++) {
1889 coords[i] = bld->bld_base.base.undef;
1890 }
1891
1892 /* Layer coord always goes into 3rd slot, except for cube map arrays */
1893 if (layer_coord) {
1894 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1895 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1896 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
1897 }
1898 /* Shadow coord occupies always 5th slot. */
1899 if (shadow_coord) {
1900 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
1901 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1902 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
1903 }
1904
1905 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1906 unsigned dim;
1907 for (dim = 0; dim < num_derivs; ++dim) {
1908 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
1909 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
1910 }
1911 deriv_ptr = &derivs;
1912 unit = inst->Src[3].Register.Index;
1913 /*
1914 * could also check all src regs if constant but I doubt such
1915 * cases exist in practice.
1916 */
1917 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
1918 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1919 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1920 }
1921 else {
1922 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1923 }
1924 }
1925 else {
1926 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1927 }
1928 } else {
1929 unit = inst->Src[1].Register.Index;
1930 }
1931
1932 /* some advanced gather instructions (txgo) would require 4 offsets */
1933 if (inst->Texture.NumOffsets == 1) {
1934 unsigned dim;
1935 for (dim = 0; dim < num_offsets; dim++) {
1936 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
1937 }
1938 }
1939
1940 bld->sampler->emit_fetch_texel(bld->sampler,
1941 bld->bld_base.base.gallivm,
1942 bld->bld_base.base.type,
1943 FALSE,
1944 unit, unit,
1945 coords,
1946 offsets,
1947 deriv_ptr,
1948 lod_bias, explicit_lod, lod_property,
1949 texel);
1950 }
1951
1952 static void
1953 emit_sample(struct lp_build_tgsi_soa_context *bld,
1954 const struct tgsi_full_instruction *inst,
1955 enum lp_build_tex_modifier modifier,
1956 boolean compare,
1957 LLVMValueRef *texel)
1958 {
1959 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1960 unsigned texture_unit, sampler_unit;
1961 LLVMValueRef lod_bias, explicit_lod;
1962 LLVMValueRef coords[5];
1963 LLVMValueRef offsets[3] = { NULL };
1964 struct lp_derivatives derivs;
1965 struct lp_derivatives *deriv_ptr = NULL;
1966 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1967
1968 unsigned num_offsets, num_derivs, i;
1969 unsigned layer_coord = 0;
1970
1971 if (!bld->sampler) {
1972 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1973 for (i = 0; i < 4; i++) {
1974 texel[i] = bld->bld_base.base.undef;
1975 }
1976 return;
1977 }
1978
1979 /*
1980 * unlike old-style tex opcodes the texture/sampler indices
1981 * always come from src1 and src2 respectively.
1982 */
1983 texture_unit = inst->Src[1].Register.Index;
1984 sampler_unit = inst->Src[2].Register.Index;
1985
1986 /*
1987 * Note inst->Texture.Texture will contain the number of offsets,
1988 * however the target information is NOT there and comes from the
1989 * declared sampler views instead.
1990 */
1991 switch (bld->sv[texture_unit].Resource) {
1992 case TGSI_TEXTURE_1D:
1993 num_offsets = 1;
1994 num_derivs = 1;
1995 break;
1996 case TGSI_TEXTURE_1D_ARRAY:
1997 layer_coord = 1;
1998 num_offsets = 1;
1999 num_derivs = 1;
2000 break;
2001 case TGSI_TEXTURE_2D:
2002 case TGSI_TEXTURE_RECT:
2003 num_offsets = 2;
2004 num_derivs = 2;
2005 break;
2006 case TGSI_TEXTURE_2D_ARRAY:
2007 layer_coord = 2;
2008 num_offsets = 2;
2009 num_derivs = 2;
2010 break;
2011 case TGSI_TEXTURE_CUBE:
2012 num_offsets = 2;
2013 num_derivs = 3;
2014 break;
2015 case TGSI_TEXTURE_3D:
2016 num_offsets = 3;
2017 num_derivs = 3;
2018 break;
2019 case TGSI_TEXTURE_CUBE_ARRAY:
2020 layer_coord = 3;
2021 num_offsets = 2;
2022 num_derivs = 3;
2023 break;
2024 default:
2025 assert(0);
2026 return;
2027 }
2028
2029 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2030 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2031 LLVMValueRef lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2032 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2033 lod_bias = lod;
2034 explicit_lod = NULL;
2035 }
2036 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2037 lod_bias = NULL;
2038 explicit_lod = lod;
2039 }
2040 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2041 }
2042 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2043 lod_bias = NULL;
2044 /* XXX might be better to explicitly pass the level zero information */
2045 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2046 }
2047 else {
2048 lod_bias = NULL;
2049 explicit_lod = NULL;
2050 }
2051
2052 for (i = 0; i < num_derivs; i++) {
2053 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2054 }
2055 for (i = num_derivs; i < 5; i++) {
2056 coords[i] = bld->bld_base.base.undef;
2057 }
2058
2059 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2060 if (layer_coord) {
2061 if (layer_coord == 3)
2062 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2063 else
2064 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2065 }
2066 /* Shadow coord occupies always 5th slot. */
2067 if (compare) {
2068 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2069 }
2070
2071 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2072 unsigned dim;
2073 for (dim = 0; dim < num_derivs; ++dim) {
2074 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2075 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2076 }
2077 deriv_ptr = &derivs;
2078 /*
2079 * could also check all src regs if constant but I doubt such
2080 * cases exist in practice.
2081 */
2082 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2083 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2084 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2085 }
2086 else {
2087 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2088 }
2089 }
2090 else {
2091 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2092 }
2093 }
2094
2095 /* some advanced gather instructions (txgo) would require 4 offsets */
2096 if (inst->Texture.NumOffsets == 1) {
2097 unsigned dim;
2098 for (dim = 0; dim < num_offsets; dim++) {
2099 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2100 }
2101 }
2102
2103 bld->sampler->emit_fetch_texel(bld->sampler,
2104 bld->bld_base.base.gallivm,
2105 bld->bld_base.base.type,
2106 FALSE,
2107 texture_unit, sampler_unit,
2108 coords,
2109 offsets,
2110 deriv_ptr,
2111 lod_bias, explicit_lod, lod_property,
2112 texel);
2113
2114 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2115 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2116 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2117 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
2118 unsigned char swizzles[4];
2119 swizzles[0] = inst->Src[1].Register.SwizzleX;
2120 swizzles[1] = inst->Src[1].Register.SwizzleY;
2121 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2122 swizzles[3] = inst->Src[1].Register.SwizzleW;
2123
2124 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2125 }
2126 }
2127
2128 static void
2129 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2130 const struct tgsi_full_instruction *inst,
2131 LLVMValueRef *texel,
2132 boolean is_samplei)
2133 {
2134 unsigned unit, target;
2135 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2136 LLVMValueRef explicit_lod = NULL;
2137 LLVMValueRef coords[3];
2138 LLVMValueRef offsets[3] = { NULL };
2139 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2140 unsigned dims, i;
2141 unsigned layer_coord = 0;
2142
2143 if (!bld->sampler) {
2144 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2145 for (i = 0; i < 4; i++) {
2146 texel[i] = coord_undef;
2147 }
2148 return;
2149 }
2150
2151 unit = inst->Src[1].Register.Index;
2152
2153 if (is_samplei) {
2154 target = bld->sv[unit].Resource;
2155 }
2156 else {
2157 target = inst->Texture.Texture;
2158 }
2159
2160 switch (target) {
2161 case TGSI_TEXTURE_1D:
2162 case TGSI_TEXTURE_BUFFER:
2163 dims = 1;
2164 break;
2165 case TGSI_TEXTURE_1D_ARRAY:
2166 layer_coord = 1;
2167 dims = 1;
2168 break;
2169 case TGSI_TEXTURE_2D:
2170 case TGSI_TEXTURE_RECT:
2171 dims = 2;
2172 break;
2173 case TGSI_TEXTURE_2D_ARRAY:
2174 layer_coord = 2;
2175 dims = 2;
2176 break;
2177 case TGSI_TEXTURE_3D:
2178 dims = 3;
2179 break;
2180 default:
2181 assert(0);
2182 return;
2183 }
2184
2185 /* always have lod except for buffers ? */
2186 if (target != TGSI_TEXTURE_BUFFER) {
2187 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2188 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2189 }
2190
2191 for (i = 0; i < dims; i++) {
2192 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2193 }
2194 for (i = dims; i < 3; i++) {
2195 coords[i] = coord_undef;
2196 }
2197 if (layer_coord)
2198 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2199
2200 if (inst->Texture.NumOffsets == 1) {
2201 unsigned dim;
2202 for (dim = 0; dim < dims; dim++) {
2203 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2204 }
2205 }
2206
2207 bld->sampler->emit_fetch_texel(bld->sampler,
2208 bld->bld_base.base.gallivm,
2209 bld->bld_base.base.type,
2210 TRUE,
2211 unit, unit,
2212 coords,
2213 offsets,
2214 NULL,
2215 NULL, explicit_lod, lod_property,
2216 texel);
2217
2218 if (is_samplei &&
2219 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2220 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2221 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2222 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
2223 unsigned char swizzles[4];
2224 swizzles[0] = inst->Src[1].Register.SwizzleX;
2225 swizzles[1] = inst->Src[1].Register.SwizzleY;
2226 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2227 swizzles[3] = inst->Src[1].Register.SwizzleW;
2228
2229 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2230 }
2231 }
2232
2233 static void
2234 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2235 const struct tgsi_full_instruction *inst,
2236 LLVMValueRef *sizes_out,
2237 boolean is_sviewinfo)
2238 {
2239 LLVMValueRef explicit_lod;
2240 enum lp_sampler_lod_property lod_property;
2241 unsigned has_lod;
2242 unsigned i;
2243 unsigned unit = inst->Src[1].Register.Index;
2244 unsigned target, pipe_target;
2245
2246 if (is_sviewinfo) {
2247 target = bld->sv[unit].Resource;
2248 }
2249 else {
2250 target = inst->Texture.Texture;
2251 }
2252 switch (target) {
2253 case TGSI_TEXTURE_BUFFER:
2254 case TGSI_TEXTURE_RECT:
2255 case TGSI_TEXTURE_SHADOWRECT:
2256 has_lod = 0;
2257 break;
2258 default:
2259 has_lod = 1;
2260 break;
2261 }
2262
2263 if (!bld->sampler) {
2264 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2265 for (i = 0; i < 4; i++)
2266 sizes_out[i] = bld->bld_base.int_bld.undef;
2267 return;
2268 }
2269
2270 if (has_lod) {
2271 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2272 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2273 }
2274 else {
2275 explicit_lod = NULL;
2276 lod_property = LP_SAMPLER_LOD_SCALAR;
2277 }
2278
2279
2280 pipe_target = tgsi_to_pipe_tex_target(target);
2281
2282 bld->sampler->emit_size_query(bld->sampler,
2283 bld->bld_base.base.gallivm,
2284 bld->bld_base.int_bld.type,
2285 unit, pipe_target,
2286 is_sviewinfo,
2287 lod_property,
2288 explicit_lod,
2289 sizes_out);
2290 }
2291
2292 static boolean
2293 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2294 int pc)
2295 {
2296 int i;
2297
2298 for (i = 0; i < 5; i++) {
2299 unsigned opcode;
2300
2301 if (pc + i >= bld->bld_base.info->num_instructions)
2302 return TRUE;
2303
2304 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2305
2306 if (opcode == TGSI_OPCODE_END)
2307 return TRUE;
2308
2309 if (opcode == TGSI_OPCODE_TEX ||
2310 opcode == TGSI_OPCODE_TXP ||
2311 opcode == TGSI_OPCODE_TXD ||
2312 opcode == TGSI_OPCODE_TXB ||
2313 opcode == TGSI_OPCODE_TXL ||
2314 opcode == TGSI_OPCODE_TXF ||
2315 opcode == TGSI_OPCODE_TXQ ||
2316 opcode == TGSI_OPCODE_CAL ||
2317 opcode == TGSI_OPCODE_CALLNZ ||
2318 opcode == TGSI_OPCODE_IF ||
2319 opcode == TGSI_OPCODE_UIF ||
2320 opcode == TGSI_OPCODE_BGNLOOP ||
2321 opcode == TGSI_OPCODE_SWITCH)
2322 return FALSE;
2323 }
2324
2325 return TRUE;
2326 }
2327
2328
2329
2330 /**
2331 * Kill fragment if any of the src register values are negative.
2332 */
2333 static void
2334 emit_kill_if(
2335 struct lp_build_tgsi_soa_context *bld,
2336 const struct tgsi_full_instruction *inst,
2337 int pc)
2338 {
2339 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2340 const struct tgsi_full_src_register *reg = &inst->Src[0];
2341 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2342 LLVMValueRef mask;
2343 unsigned chan_index;
2344
2345 memset(&terms, 0, sizeof terms);
2346
2347 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2348 unsigned swizzle;
2349
2350 /* Unswizzle channel */
2351 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2352
2353 /* Check if the component has not been already tested. */
2354 assert(swizzle < TGSI_NUM_CHANNELS);
2355 if( !terms[swizzle] )
2356 /* TODO: change the comparison operator instead of setting the sign */
2357 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2358 }
2359
2360 mask = NULL;
2361 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2362 if(terms[chan_index]) {
2363 LLVMValueRef chan_mask;
2364
2365 /*
2366 * If term < 0 then mask = 0 else mask = ~0.
2367 */
2368 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2369
2370 if(mask)
2371 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2372 else
2373 mask = chan_mask;
2374 }
2375 }
2376
2377 if (bld->exec_mask.has_mask) {
2378 LLVMValueRef invmask;
2379 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2380 mask = LLVMBuildOr(builder, mask, invmask, "");
2381 }
2382
2383 lp_build_mask_update(bld->mask, mask);
2384 if (!near_end_of_shader(bld, pc))
2385 lp_build_mask_check(bld->mask);
2386 }
2387
2388
2389 /**
2390 * Unconditional fragment kill.
2391 * The only predication is the execution mask which will apply if
2392 * we're inside a loop or conditional.
2393 */
2394 static void
2395 emit_kill(struct lp_build_tgsi_soa_context *bld,
2396 int pc)
2397 {
2398 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2399 LLVMValueRef mask;
2400
2401 /* For those channels which are "alive", disable fragment shader
2402 * execution.
2403 */
2404 if (bld->exec_mask.has_mask) {
2405 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2406 }
2407 else {
2408 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2409 mask = zero;
2410 }
2411
2412 lp_build_mask_update(bld->mask, mask);
2413
2414 if (!near_end_of_shader(bld, pc))
2415 lp_build_mask_check(bld->mask);
2416 }
2417
2418
2419 /**
2420 * Emit code which will dump the value of all the temporary registers
2421 * to stdout.
2422 */
2423 static void
2424 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2425 unsigned file)
2426 {
2427 const struct tgsi_shader_info *info = bld->bld_base.info;
2428 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2429 LLVMBuilderRef builder = gallivm->builder;
2430 LLVMValueRef reg_ptr;
2431 int index;
2432 int max_index = info->file_max[file];
2433
2434 /*
2435 * Some register files, particularly constants, can be very large,
2436 * and dumping everything could make this unusably slow.
2437 */
2438 max_index = MIN2(max_index, 32);
2439
2440 for (index = 0; index <= max_index; index++) {
2441 LLVMValueRef res;
2442 unsigned mask;
2443 int chan;
2444
2445 if (index < 8 * sizeof(unsigned) &&
2446 (info->file_mask[file] & (1 << index)) == 0) {
2447 /* This was not declared.*/
2448 continue;
2449 }
2450
2451 if (file == TGSI_FILE_INPUT) {
2452 mask = info->input_usage_mask[index];
2453 } else {
2454 mask = TGSI_WRITEMASK_XYZW;
2455 }
2456
2457 for (chan = 0; chan < 4; chan++) {
2458 if ((mask & (1 << chan)) == 0) {
2459 /* This channel is not used.*/
2460 continue;
2461 }
2462
2463 if (file == TGSI_FILE_CONSTANT) {
2464 struct tgsi_full_src_register reg;
2465 memset(&reg, 0, sizeof reg);
2466 reg.Register.File = file;
2467 reg.Register.Index = index;
2468 reg.Register.SwizzleX = 0;
2469 reg.Register.SwizzleY = 1;
2470 reg.Register.SwizzleZ = 2;
2471 reg.Register.SwizzleW = 3;
2472
2473 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2474 if (!res) {
2475 continue;
2476 }
2477 } else if (file == TGSI_FILE_INPUT) {
2478 res = bld->inputs[index][chan];
2479 if (!res) {
2480 continue;
2481 }
2482 } else if (file == TGSI_FILE_TEMPORARY) {
2483 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2484 assert(reg_ptr);
2485 res = LLVMBuildLoad(builder, reg_ptr, "");
2486 } else if (file == TGSI_FILE_OUTPUT) {
2487 reg_ptr = lp_get_output_ptr(bld, index, chan);
2488 assert(reg_ptr);
2489 res = LLVMBuildLoad(builder, reg_ptr, "");
2490 } else {
2491 assert(0);
2492 continue;
2493 }
2494
2495 emit_dump_reg(gallivm, file, index, chan, res);
2496 }
2497 }
2498 }
2499
2500
2501
2502 void
2503 lp_emit_declaration_soa(
2504 struct lp_build_tgsi_context *bld_base,
2505 const struct tgsi_full_declaration *decl)
2506 {
2507 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2508 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2509 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2510 const unsigned first = decl->Range.First;
2511 const unsigned last = decl->Range.Last;
2512 unsigned idx, i;
2513
2514 for (idx = first; idx <= last; ++idx) {
2515 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2516 switch (decl->Declaration.File) {
2517 case TGSI_FILE_TEMPORARY:
2518 assert(idx < LP_MAX_TGSI_TEMPS);
2519 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2520 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2521 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2522 }
2523 break;
2524
2525 case TGSI_FILE_OUTPUT:
2526 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2527 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2528 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2529 vec_type, "output");
2530 }
2531 break;
2532
2533 case TGSI_FILE_ADDRESS:
2534 /* ADDR registers are only allocated with an integer LLVM IR type,
2535 * as they are guaranteed to always have integers.
2536 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2537 * an ADDR register for that matter).
2538 */
2539 assert(idx < LP_MAX_TGSI_ADDRS);
2540 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2541 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2542 break;
2543
2544 case TGSI_FILE_PREDICATE:
2545 assert(idx < LP_MAX_TGSI_PREDS);
2546 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2547 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2548 "predicate");
2549 break;
2550
2551 case TGSI_FILE_SAMPLER_VIEW:
2552 /*
2553 * The target stored here MUST match whatever there actually
2554 * is in the set sampler views (what about return type?).
2555 */
2556 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2557 bld->sv[idx] = decl->SamplerView;
2558 break;
2559
2560 default:
2561 /* don't need to declare other vars */
2562 break;
2563 }
2564 }
2565 }
2566
2567
2568 void lp_emit_immediate_soa(
2569 struct lp_build_tgsi_context *bld_base,
2570 const struct tgsi_full_immediate *imm)
2571 {
2572 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2573 struct gallivm_state * gallivm = bld_base->base.gallivm;
2574
2575 /* simply copy the immediate values into the next immediates[] slot */
2576 unsigned i;
2577 const uint size = imm->Immediate.NrTokens - 1;
2578 assert(size <= 4);
2579 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
2580 switch (imm->Immediate.DataType) {
2581 case TGSI_IMM_FLOAT32:
2582 for( i = 0; i < size; ++i )
2583 bld->immediates[bld->num_immediates][i] =
2584 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2585
2586 break;
2587 case TGSI_IMM_UINT32:
2588 for( i = 0; i < size; ++i ) {
2589 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2590 bld->immediates[bld->num_immediates][i] =
2591 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2592 }
2593
2594 break;
2595 case TGSI_IMM_INT32:
2596 for( i = 0; i < size; ++i ) {
2597 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2598 bld->immediates[bld->num_immediates][i] =
2599 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2600 }
2601
2602 break;
2603 }
2604 for( i = size; i < 4; ++i )
2605 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
2606
2607 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2608 unsigned index = bld->num_immediates;
2609 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2610 LLVMBuilderRef builder = gallivm->builder;
2611 for (i = 0; i < 4; ++i ) {
2612 LLVMValueRef lindex = lp_build_const_int32(
2613 bld->bld_base.base.gallivm, index * 4 + i);
2614 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2615 bld->imms_array, &lindex, 1, "");
2616 LLVMBuildStore(builder,
2617 bld->immediates[index][i],
2618 imm_ptr);
2619 }
2620 }
2621
2622 bld->num_immediates++;
2623 }
2624
2625 static void
2626 ddx_emit(
2627 const struct lp_build_tgsi_action * action,
2628 struct lp_build_tgsi_context * bld_base,
2629 struct lp_build_emit_data * emit_data)
2630 {
2631 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2632
2633 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2634 &emit_data->output[emit_data->chan], NULL);
2635 }
2636
2637 static void
2638 ddy_emit(
2639 const struct lp_build_tgsi_action * action,
2640 struct lp_build_tgsi_context * bld_base,
2641 struct lp_build_emit_data * emit_data)
2642 {
2643 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2644
2645 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2646 &emit_data->output[emit_data->chan]);
2647 }
2648
2649 static void
2650 kill_emit(
2651 const struct lp_build_tgsi_action * action,
2652 struct lp_build_tgsi_context * bld_base,
2653 struct lp_build_emit_data * emit_data)
2654 {
2655 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2656
2657 emit_kill(bld, bld_base->pc - 1);
2658 }
2659
2660 static void
2661 kill_if_emit(
2662 const struct lp_build_tgsi_action * action,
2663 struct lp_build_tgsi_context * bld_base,
2664 struct lp_build_emit_data * emit_data)
2665 {
2666 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2667
2668 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2669 }
2670
2671 static void
2672 tex_emit(
2673 const struct lp_build_tgsi_action * action,
2674 struct lp_build_tgsi_context * bld_base,
2675 struct lp_build_emit_data * emit_data)
2676 {
2677 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2678
2679 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2680 }
2681
2682 static void
2683 txb_emit(
2684 const struct lp_build_tgsi_action * action,
2685 struct lp_build_tgsi_context * bld_base,
2686 struct lp_build_emit_data * emit_data)
2687 {
2688 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2689
2690 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2691 emit_data->output);
2692 }
2693
2694 static void
2695 txd_emit(
2696 const struct lp_build_tgsi_action * action,
2697 struct lp_build_tgsi_context * bld_base,
2698 struct lp_build_emit_data * emit_data)
2699 {
2700 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2701
2702 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2703 emit_data->output);
2704 }
2705
2706 static void
2707 txl_emit(
2708 const struct lp_build_tgsi_action * action,
2709 struct lp_build_tgsi_context * bld_base,
2710 struct lp_build_emit_data * emit_data)
2711 {
2712 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2713
2714 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2715 emit_data->output);
2716 }
2717
2718 static void
2719 txp_emit(
2720 const struct lp_build_tgsi_action * action,
2721 struct lp_build_tgsi_context * bld_base,
2722 struct lp_build_emit_data * emit_data)
2723 {
2724 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2725
2726 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2727 emit_data->output);
2728 }
2729
2730 static void
2731 txq_emit(
2732 const struct lp_build_tgsi_action * action,
2733 struct lp_build_tgsi_context * bld_base,
2734 struct lp_build_emit_data * emit_data)
2735 {
2736 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2737
2738 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2739 }
2740
2741 static void
2742 txf_emit(
2743 const struct lp_build_tgsi_action * action,
2744 struct lp_build_tgsi_context * bld_base,
2745 struct lp_build_emit_data * emit_data)
2746 {
2747 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2748
2749 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2750 }
2751
2752 static void
2753 sample_i_emit(
2754 const struct lp_build_tgsi_action * action,
2755 struct lp_build_tgsi_context * bld_base,
2756 struct lp_build_emit_data * emit_data)
2757 {
2758 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2759
2760 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2761 }
2762
2763 static void
2764 sample_emit(
2765 const struct lp_build_tgsi_action * action,
2766 struct lp_build_tgsi_context * bld_base,
2767 struct lp_build_emit_data * emit_data)
2768 {
2769 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2770
2771 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2772 FALSE, emit_data->output);
2773 }
2774
2775 static void
2776 sample_b_emit(
2777 const struct lp_build_tgsi_action * action,
2778 struct lp_build_tgsi_context * bld_base,
2779 struct lp_build_emit_data * emit_data)
2780 {
2781 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2782
2783 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2784 FALSE, emit_data->output);
2785 }
2786
2787 static void
2788 sample_c_emit(
2789 const struct lp_build_tgsi_action * action,
2790 struct lp_build_tgsi_context * bld_base,
2791 struct lp_build_emit_data * emit_data)
2792 {
2793 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2794
2795 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2796 TRUE, emit_data->output);
2797 }
2798
2799 static void
2800 sample_c_lz_emit(
2801 const struct lp_build_tgsi_action * action,
2802 struct lp_build_tgsi_context * bld_base,
2803 struct lp_build_emit_data * emit_data)
2804 {
2805 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2806
2807 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2808 TRUE, emit_data->output);
2809 }
2810
2811 static void
2812 sample_d_emit(
2813 const struct lp_build_tgsi_action * action,
2814 struct lp_build_tgsi_context * bld_base,
2815 struct lp_build_emit_data * emit_data)
2816 {
2817 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2818
2819 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2820 FALSE, emit_data->output);
2821 }
2822
2823 static void
2824 sample_l_emit(
2825 const struct lp_build_tgsi_action * action,
2826 struct lp_build_tgsi_context * bld_base,
2827 struct lp_build_emit_data * emit_data)
2828 {
2829 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2830
2831 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2832 FALSE, emit_data->output);
2833 }
2834
2835 static void
2836 sviewinfo_emit(
2837 const struct lp_build_tgsi_action * action,
2838 struct lp_build_tgsi_context * bld_base,
2839 struct lp_build_emit_data * emit_data)
2840 {
2841 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2842
2843 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2844 }
2845
2846 static LLVMValueRef
2847 mask_vec(struct lp_build_tgsi_context *bld_base)
2848 {
2849 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2850 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2851 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2852
2853 if (!exec_mask->has_mask) {
2854 return lp_build_mask_value(bld->mask);
2855 }
2856 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
2857 exec_mask->exec_mask, "");
2858 }
2859
2860 static void
2861 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2862 LLVMValueRef ptr,
2863 LLVMValueRef mask)
2864 {
2865 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2866 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2867
2868 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2869
2870 LLVMBuildStore(builder, current_vec, ptr);
2871 }
2872
2873 static void
2874 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2875 LLVMValueRef ptr,
2876 LLVMValueRef mask)
2877 {
2878 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2879 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2880
2881 current_vec = lp_build_select(&bld_base->uint_bld,
2882 mask,
2883 bld_base->uint_bld.zero,
2884 current_vec);
2885
2886 LLVMBuildStore(builder, current_vec, ptr);
2887 }
2888
2889 static LLVMValueRef
2890 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
2891 LLVMValueRef current_mask_vec,
2892 LLVMValueRef total_emitted_vertices_vec)
2893 {
2894 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2895 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2896 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2897 total_emitted_vertices_vec,
2898 bld->max_output_vertices_vec);
2899
2900 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2901 }
2902
2903 static void
2904 emit_vertex(
2905 const struct lp_build_tgsi_action * action,
2906 struct lp_build_tgsi_context * bld_base,
2907 struct lp_build_emit_data * emit_data)
2908 {
2909 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2910 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2911
2912 if (bld->gs_iface->emit_vertex) {
2913 LLVMValueRef mask = mask_vec(bld_base);
2914 LLVMValueRef total_emitted_vertices_vec =
2915 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2916 mask = clamp_mask_to_max_output_vertices(bld, mask,
2917 total_emitted_vertices_vec);
2918 gather_outputs(bld);
2919 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2920 bld->outputs,
2921 total_emitted_vertices_vec);
2922 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2923 mask);
2924 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2925 mask);
2926 #if DUMP_GS_EMITS
2927 lp_build_print_value(bld->bld_base.base.gallivm,
2928 " +++ emit vertex masked ones = ",
2929 mask);
2930 lp_build_print_value(bld->bld_base.base.gallivm,
2931 " +++ emit vertex emitted = ",
2932 total_emitted_vertices_vec);
2933 #endif
2934 }
2935 }
2936
2937
2938 static void
2939 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
2940 LLVMValueRef mask)
2941 {
2942 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2943 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2944
2945 if (bld->gs_iface->end_primitive) {
2946 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2947 LLVMValueRef emitted_vertices_vec =
2948 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2949 LLVMValueRef emitted_prims_vec =
2950 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2951
2952 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2953 emitted_vertices_vec,
2954 uint_bld->zero);
2955 /* We need to combine the current execution mask with the mask
2956 telling us which, if any, execution slots actually have
2957 unemitted primitives, this way we make sure that end_primitives
2958 executes only on the paths that have unflushed vertices */
2959 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2960
2961 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2962 emitted_vertices_vec,
2963 emitted_prims_vec);
2964
2965 #if DUMP_GS_EMITS
2966 lp_build_print_value(bld->bld_base.base.gallivm,
2967 " +++ end prim masked ones = ",
2968 mask);
2969 lp_build_print_value(bld->bld_base.base.gallivm,
2970 " +++ end prim emitted verts1 = ",
2971 emitted_vertices_vec);
2972 lp_build_print_value(bld->bld_base.base.gallivm,
2973 " +++ end prim emitted prims1 = ",
2974 LLVMBuildLoad(builder,
2975 bld->emitted_prims_vec_ptr, ""));
2976 #endif
2977 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2978 mask);
2979 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2980 mask);
2981 #if DUMP_GS_EMITS
2982 lp_build_print_value(bld->bld_base.base.gallivm,
2983 " +++ end prim emitted verts2 = ",
2984 LLVMBuildLoad(builder,
2985 bld->emitted_vertices_vec_ptr, ""));
2986 #endif
2987 }
2988
2989 }
2990
2991 static void
2992 end_primitive(
2993 const struct lp_build_tgsi_action * action,
2994 struct lp_build_tgsi_context * bld_base,
2995 struct lp_build_emit_data * emit_data)
2996 {
2997 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2998
2999 if (bld->gs_iface->end_primitive) {
3000 LLVMValueRef mask = mask_vec(bld_base);
3001 end_primitive_masked(bld_base, mask);
3002 }
3003 }
3004
3005 static void
3006 cal_emit(
3007 const struct lp_build_tgsi_action * action,
3008 struct lp_build_tgsi_context * bld_base,
3009 struct lp_build_emit_data * emit_data)
3010 {
3011 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3012
3013 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3014 &bld_base->pc);
3015 }
3016
3017 static void
3018 ret_emit(
3019 const struct lp_build_tgsi_action * action,
3020 struct lp_build_tgsi_context * bld_base,
3021 struct lp_build_emit_data * emit_data)
3022 {
3023 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3024
3025 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3026 }
3027
3028 static void
3029 brk_emit(
3030 const struct lp_build_tgsi_action * action,
3031 struct lp_build_tgsi_context * bld_base,
3032 struct lp_build_emit_data * emit_data)
3033 {
3034 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3035
3036 lp_exec_break(&bld->exec_mask, bld_base);
3037 }
3038
3039 static void
3040 breakc_emit(
3041 const struct lp_build_tgsi_action * action,
3042 struct lp_build_tgsi_context * bld_base,
3043 struct lp_build_emit_data * emit_data)
3044 {
3045 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3046 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3047 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3048 LLVMValueRef unsigned_cond =
3049 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3050 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3051 unsigned_cond,
3052 uint_bld->zero);
3053
3054 lp_exec_break_condition(&bld->exec_mask, cond);
3055 }
3056
3057 static void
3058 if_emit(
3059 const struct lp_build_tgsi_action * action,
3060 struct lp_build_tgsi_context * bld_base,
3061 struct lp_build_emit_data * emit_data)
3062 {
3063 LLVMValueRef tmp;
3064 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3065
3066 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3067 emit_data->args[0], bld->bld_base.base.zero);
3068 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3069 }
3070
3071 static void
3072 uif_emit(
3073 const struct lp_build_tgsi_action * action,
3074 struct lp_build_tgsi_context * bld_base,
3075 struct lp_build_emit_data * emit_data)
3076 {
3077 LLVMValueRef tmp;
3078 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3079 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3080
3081 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3082 emit_data->args[0], uint_bld->zero);
3083 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3084 }
3085
3086 static void
3087 case_emit(
3088 const struct lp_build_tgsi_action * action,
3089 struct lp_build_tgsi_context * bld_base,
3090 struct lp_build_emit_data * emit_data)
3091 {
3092 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3093
3094 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3095 }
3096
3097 static void
3098 default_emit(
3099 const struct lp_build_tgsi_action * action,
3100 struct lp_build_tgsi_context * bld_base,
3101 struct lp_build_emit_data * emit_data)
3102 {
3103 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3104
3105 lp_exec_default(&bld->exec_mask, bld_base);
3106 }
3107
3108 static void
3109 switch_emit(
3110 const struct lp_build_tgsi_action * action,
3111 struct lp_build_tgsi_context * bld_base,
3112 struct lp_build_emit_data * emit_data)
3113 {
3114 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3115
3116 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3117 }
3118
3119 static void
3120 endswitch_emit(
3121 const struct lp_build_tgsi_action * action,
3122 struct lp_build_tgsi_context * bld_base,
3123 struct lp_build_emit_data * emit_data)
3124 {
3125 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3126
3127 lp_exec_endswitch(&bld->exec_mask, bld_base);
3128 }
3129
3130 static void
3131 bgnloop_emit(
3132 const struct lp_build_tgsi_action * action,
3133 struct lp_build_tgsi_context * bld_base,
3134 struct lp_build_emit_data * emit_data)
3135 {
3136 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3137
3138 lp_exec_bgnloop(&bld->exec_mask);
3139 }
3140
3141 static void
3142 bgnsub_emit(
3143 const struct lp_build_tgsi_action * action,
3144 struct lp_build_tgsi_context * bld_base,
3145 struct lp_build_emit_data * emit_data)
3146 {
3147 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3148
3149 lp_exec_mask_bgnsub(&bld->exec_mask);
3150 }
3151
3152 static void
3153 else_emit(
3154 const struct lp_build_tgsi_action * action,
3155 struct lp_build_tgsi_context * bld_base,
3156 struct lp_build_emit_data * emit_data)
3157 {
3158 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3159
3160 lp_exec_mask_cond_invert(&bld->exec_mask);
3161 }
3162
3163 static void
3164 endif_emit(
3165 const struct lp_build_tgsi_action * action,
3166 struct lp_build_tgsi_context * bld_base,
3167 struct lp_build_emit_data * emit_data)
3168 {
3169 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3170
3171 lp_exec_mask_cond_pop(&bld->exec_mask);
3172 }
3173
3174 static void
3175 endloop_emit(
3176 const struct lp_build_tgsi_action * action,
3177 struct lp_build_tgsi_context * bld_base,
3178 struct lp_build_emit_data * emit_data)
3179 {
3180 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3181
3182 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3183 }
3184
3185 static void
3186 endsub_emit(
3187 const struct lp_build_tgsi_action * action,
3188 struct lp_build_tgsi_context * bld_base,
3189 struct lp_build_emit_data * emit_data)
3190 {
3191 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3192
3193 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3194 }
3195
3196 static void
3197 cont_emit(
3198 const struct lp_build_tgsi_action * action,
3199 struct lp_build_tgsi_context * bld_base,
3200 struct lp_build_emit_data * emit_data)
3201 {
3202 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3203
3204 lp_exec_continue(&bld->exec_mask);
3205 }
3206
3207 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
3208 *
3209 * XXX: What do the comments about xmm registers mean? Maybe they are left over
3210 * from old code, but there is no garauntee that LLVM will use those registers
3211 * for this code.
3212 *
3213 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
3214 * should be handled by the emit_data->fetch_args function. */
3215 static void
3216 nrm_emit(
3217 const struct lp_build_tgsi_action * action,
3218 struct lp_build_tgsi_context * bld_base,
3219 struct lp_build_emit_data * emit_data)
3220 {
3221 LLVMValueRef tmp0, tmp1;
3222 LLVMValueRef tmp4 = NULL;
3223 LLVMValueRef tmp5 = NULL;
3224 LLVMValueRef tmp6 = NULL;
3225 LLVMValueRef tmp7 = NULL;
3226 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3227
3228 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
3229
3230 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
3231 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
3232 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
3233 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
3234
3235 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
3236
3237 /* xmm4 = src.x */
3238 /* xmm0 = src.x * src.x */
3239 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
3240 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
3241 tmp4 = tmp0;
3242 }
3243 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
3244
3245 /* xmm5 = src.y */
3246 /* xmm0 = xmm0 + src.y * src.y */
3247 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
3248 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
3249 tmp5 = tmp1;
3250 }
3251 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3252 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3253
3254 /* xmm6 = src.z */
3255 /* xmm0 = xmm0 + src.z * src.z */
3256 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
3257 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
3258 tmp6 = tmp1;
3259 }
3260 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3261 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3262
3263 if (dims == 4) {
3264 /* xmm7 = src.w */
3265 /* xmm0 = xmm0 + src.w * src.w */
3266 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
3267 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
3268 tmp7 = tmp1;
3269 }
3270 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3271 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3272 }
3273 /* xmm1 = 1 / sqrt(xmm0) */
3274 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
3275 /* dst.x = xmm1 * src.x */
3276 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
3277 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
3278 }
3279 /* dst.y = xmm1 * src.y */
3280 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
3281 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
3282 }
3283
3284 /* dst.z = xmm1 * src.z */
3285 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
3286 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
3287 }
3288 /* dst.w = xmm1 * src.w */
3289 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
3290 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
3291 }
3292 }
3293
3294 /* dst.w = 1.0 */
3295 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
3296 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
3297 }
3298 }
3299
3300 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3301 {
3302 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3303 struct gallivm_state * gallivm = bld_base->base.gallivm;
3304
3305 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3306 LLVMValueRef array_size =
3307 lp_build_const_int32(gallivm,
3308 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3309 bld->temps_array = lp_build_array_alloca(gallivm,
3310 bld_base->base.vec_type, array_size,
3311 "temp_array");
3312 }
3313
3314 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3315 LLVMValueRef array_size =
3316 lp_build_const_int32(gallivm,
3317 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3318 bld->outputs_array = lp_build_array_alloca(gallivm,
3319 bld_base->base.vec_type, array_size,
3320 "output_array");
3321 }
3322
3323 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3324 LLVMValueRef array_size =
3325 lp_build_const_int32(gallivm,
3326 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3327 bld->imms_array = lp_build_array_alloca(gallivm,
3328 bld_base->base.vec_type, array_size,
3329 "imms_array");
3330 }
3331
3332 /* If we have indirect addressing in inputs we need to copy them into
3333 * our alloca array to be able to iterate over them */
3334 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3335 unsigned index, chan;
3336 LLVMTypeRef vec_type = bld_base->base.vec_type;
3337 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3338 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3339 bld->inputs_array = lp_build_array_alloca(gallivm,
3340 vec_type, array_size,
3341 "input_array");
3342
3343 assert(bld_base->info->num_inputs
3344 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3345
3346 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3347 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3348 LLVMValueRef lindex =
3349 lp_build_const_int32(gallivm, index * 4 + chan);
3350 LLVMValueRef input_ptr =
3351 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3352 &lindex, 1, "");
3353 LLVMValueRef value = bld->inputs[index][chan];
3354 if (value)
3355 LLVMBuildStore(gallivm->builder, value, input_ptr);
3356 }
3357 }
3358 }
3359
3360 if (bld->gs_iface) {
3361 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3362 bld->emitted_prims_vec_ptr =
3363 lp_build_alloca(gallivm,
3364 uint_bld->vec_type,
3365 "emitted_prims_ptr");
3366 bld->emitted_vertices_vec_ptr =
3367 lp_build_alloca(gallivm,
3368 uint_bld->vec_type,
3369 "emitted_vertices_ptr");
3370 bld->total_emitted_vertices_vec_ptr =
3371 lp_build_alloca(gallivm,
3372 uint_bld->vec_type,
3373 "total_emitted_vertices_ptr");
3374
3375 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3376 bld->emitted_prims_vec_ptr);
3377 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3378 bld->emitted_vertices_vec_ptr);
3379 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3380 bld->total_emitted_vertices_vec_ptr);
3381 }
3382
3383 if (DEBUG_EXECUTION) {
3384 lp_build_printf(gallivm, "\n");
3385 emit_dump_file(bld, TGSI_FILE_CONSTANT);
3386 emit_dump_file(bld, TGSI_FILE_INPUT);
3387 }
3388 }
3389
3390 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3391 {
3392 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3393 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3394
3395 if (DEBUG_EXECUTION) {
3396 /* for debugging */
3397 if (0) {
3398 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3399 }
3400 emit_dump_file(bld, TGSI_FILE_OUTPUT);
3401 lp_build_printf(bld_base->base.gallivm, "\n");
3402 }
3403
3404 /* If we have indirect addressing in outputs we need to copy our alloca array
3405 * to the outputs slots specified by the caller */
3406 if (bld->gs_iface) {
3407 LLVMValueRef total_emitted_vertices_vec;
3408 LLVMValueRef emitted_prims_vec;
3409 /* implicit end_primitives, needed in case there are any unflushed
3410 vertices in the cache. Note must not call end_primitive here
3411 since the exec_mask is not valid at this point. */
3412 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3413
3414 total_emitted_vertices_vec =
3415 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3416 emitted_prims_vec =
3417 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3418
3419 bld->gs_iface->gs_epilogue(bld->gs_iface,
3420 &bld->bld_base,
3421 total_emitted_vertices_vec,
3422 emitted_prims_vec);
3423 } else {
3424 gather_outputs(bld);
3425 }
3426 }
3427
3428 void
3429 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3430 const struct tgsi_token *tokens,
3431 struct lp_type type,
3432 struct lp_build_mask_context *mask,
3433 LLVMValueRef consts_ptr,
3434 LLVMValueRef const_sizes_ptr,
3435 const struct lp_bld_tgsi_system_values *system_values,
3436 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3437 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3438 struct lp_build_sampler_soa *sampler,
3439 const struct tgsi_shader_info *info,
3440 const struct lp_build_tgsi_gs_iface *gs_iface)
3441 {
3442 struct lp_build_tgsi_soa_context bld;
3443
3444 struct lp_type res_type;
3445
3446 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3447 memset(&res_type, 0, sizeof res_type);
3448 res_type.width = type.width;
3449 res_type.length = type.length;
3450 res_type.sign = 1;
3451
3452 /* Setup build context */
3453 memset(&bld, 0, sizeof bld);
3454 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3455 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3456 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3457 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3458 bld.mask = mask;
3459 bld.inputs = inputs;
3460 bld.outputs = outputs;
3461 bld.consts_ptr = consts_ptr;
3462 bld.const_sizes_ptr = const_sizes_ptr;
3463 bld.sampler = sampler;
3464 bld.bld_base.info = info;
3465 bld.indirect_files = info->indirect_files;
3466
3467 bld.bld_base.soa = TRUE;
3468 bld.bld_base.emit_debug = emit_debug;
3469 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3470 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3471 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3472 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3473 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3474 bld.bld_base.emit_store = emit_store;
3475
3476 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3477 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3478
3479 bld.bld_base.emit_prologue = emit_prologue;
3480 bld.bld_base.emit_epilogue = emit_epilogue;
3481
3482 /* Set opcode actions */
3483 lp_set_default_actions_cpu(&bld.bld_base);
3484
3485 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3486 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3487 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3488 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3489 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3490 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3491 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3492 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3493 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3494 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3495 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3496 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3497 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3498 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3499 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3500 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3501 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3502 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3503 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3504 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
3505 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
3506 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3507 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3508 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3509 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3510 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3511 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3512 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3513 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3514 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3515 /* DX10 sampling ops */
3516 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3517 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3518 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3519 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3520 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3521 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3522 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3523 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3524
3525 if (gs_iface) {
3526 /* There's no specific value for this because it should always
3527 * be set, but apps using ext_geometry_shader4 quite often
3528 * were forgetting so we're using MAX_VERTEX_VARYING from
3529 * that spec even though we could debug_assert if it's not
3530 * set, but that's a lot uglier. */
3531 uint max_output_vertices = 32;
3532 uint i = 0;
3533 /* inputs are always indirect with gs */
3534 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3535 bld.gs_iface = gs_iface;
3536 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3537 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3538 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3539
3540 for (i = 0; i < info->num_properties; ++i) {
3541 if (info->properties[i].name ==
3542 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
3543 max_output_vertices = info->properties[i].data[0];
3544 }
3545 }
3546 bld.max_output_vertices_vec =
3547 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3548 max_output_vertices);
3549 }
3550
3551 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3552
3553 bld.system_values = *system_values;
3554
3555 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3556
3557 if (0) {
3558 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3559 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3560 debug_printf("11111111111111111111111111111 \n");
3561 tgsi_dump(tokens, 0);
3562 lp_debug_dump_value(function);
3563 debug_printf("2222222222222222222222222222 \n");
3564 }
3565
3566 if (0) {
3567 LLVMModuleRef module = LLVMGetGlobalParent(
3568 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3569 LLVMDumpModule(module);
3570
3571 }
3572 }