gallivm: implement switch opcode
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68 #define DUMP_GS_EMITS 0
69
70 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
71 {
72 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
73 LLVMBuilderRef builder = bld->gallivm->builder;
74
75 mask->bld = bld;
76 mask->has_mask = FALSE;
77 mask->ret_in_main = FALSE;
78 mask->cond_stack_size = 0;
79 mask->loop_stack_size = 0;
80 mask->call_stack_size = 0;
81 mask->switch_stack_size = 0;
82
83 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
84 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
85 mask->cond_mask = mask->switch_mask =
86 LLVMConstAllOnes(mask->int_vec_type);
87
88 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
89
90 LLVMBuildStore(
91 builder,
92 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
93 mask->loop_limiter);
94 }
95
96 static void lp_exec_mask_update(struct lp_exec_mask *mask)
97 {
98 LLVMBuilderRef builder = mask->bld->gallivm->builder;
99
100 if (mask->loop_stack_size) {
101 /*for loops we need to update the entire mask at runtime */
102 LLVMValueRef tmp;
103 assert(mask->break_mask);
104 tmp = LLVMBuildAnd(builder,
105 mask->cont_mask,
106 mask->break_mask,
107 "maskcb");
108 mask->exec_mask = LLVMBuildAnd(builder,
109 mask->cond_mask,
110 tmp,
111 "maskfull");
112 } else
113 mask->exec_mask = mask->cond_mask;
114
115 if (mask->switch_stack_size) {
116 mask->exec_mask = LLVMBuildAnd(builder,
117 mask->exec_mask,
118 mask->switch_mask,
119 "switchmask");
120 }
121
122 if (mask->call_stack_size || mask->ret_in_main) {
123 mask->exec_mask = LLVMBuildAnd(builder,
124 mask->exec_mask,
125 mask->ret_mask,
126 "callmask");
127 }
128
129 mask->has_mask = (mask->cond_stack_size > 0 ||
130 mask->loop_stack_size > 0 ||
131 mask->call_stack_size > 0 ||
132 mask->switch_stack_size > 0 ||
133 mask->ret_in_main);
134 }
135
136 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
137 LLVMValueRef val)
138 {
139 LLVMBuilderRef builder = mask->bld->gallivm->builder;
140
141 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
142 if (mask->cond_stack_size == 0) {
143 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
144 }
145 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
146 assert(LLVMTypeOf(val) == mask->int_vec_type);
147 mask->cond_mask = LLVMBuildAnd(builder,
148 mask->cond_mask,
149 val,
150 "");
151 lp_exec_mask_update(mask);
152 }
153
154 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
155 {
156 LLVMBuilderRef builder = mask->bld->gallivm->builder;
157 LLVMValueRef prev_mask;
158 LLVMValueRef inv_mask;
159
160 assert(mask->cond_stack_size);
161 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
162 if (mask->cond_stack_size == 1) {
163 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
164 }
165
166 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
167
168 mask->cond_mask = LLVMBuildAnd(builder,
169 inv_mask,
170 prev_mask, "");
171 lp_exec_mask_update(mask);
172 }
173
174 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
175 {
176 assert(mask->cond_stack_size);
177 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
178 lp_exec_mask_update(mask);
179 }
180
181 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
182 {
183 LLVMBuilderRef builder = mask->bld->gallivm->builder;
184
185 if (mask->loop_stack_size == 0) {
186 assert(mask->loop_block == NULL);
187 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
188 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
189 assert(mask->break_var == NULL);
190 }
191
192 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
193
194 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
195 mask->break_type;
196 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
197
198 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
199 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
200 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
201 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
202 ++mask->loop_stack_size;
203
204 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
205 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
206
207 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
208
209 LLVMBuildBr(builder, mask->loop_block);
210 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
211
212 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
213
214 lp_exec_mask_update(mask);
215 }
216
217 static void lp_exec_break(struct lp_exec_mask *mask,
218 struct lp_build_tgsi_context * bld_base)
219 {
220 LLVMBuilderRef builder = mask->bld->gallivm->builder;
221
222 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
223 LLVMValueRef exec_mask = LLVMBuildNot(builder,
224 mask->exec_mask,
225 "break");
226
227 mask->break_mask = LLVMBuildAnd(builder,
228 mask->break_mask,
229 exec_mask, "break_full");
230 }
231 else {
232 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
233 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
234 opcode == TGSI_OPCODE_CASE);
235
236
237 if (mask->switch_in_default) {
238 /*
239 * stop default execution but only if this is an unconditional switch.
240 * (The condition here is not perfect since dead code after break is
241 * allowed but should be sufficient since false negatives are just
242 * unoptimized - so we don't have to pre-evaluate that).
243 */
244 if(break_always && mask->switch_pc) {
245 bld_base->pc = mask->switch_pc;
246 return;
247 }
248 }
249
250 if (break_always) {
251 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
252 }
253 else {
254 LLVMValueRef exec_mask = LLVMBuildNot(builder,
255 mask->exec_mask,
256 "break");
257 mask->switch_mask = LLVMBuildAnd(builder,
258 mask->switch_mask,
259 exec_mask, "break_switch");
260 }
261 }
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_break_condition(struct lp_exec_mask *mask,
267 LLVMValueRef cond)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
271 mask->exec_mask,
272 cond, "cond_mask");
273 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
274
275 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
276 mask->break_mask = LLVMBuildAnd(builder,
277 mask->break_mask,
278 cond_mask, "breakc_full");
279 }
280 else {
281 mask->switch_mask = LLVMBuildAnd(builder,
282 mask->switch_mask,
283 cond_mask, "breakc_switch");
284 }
285
286 lp_exec_mask_update(mask);
287 }
288
289 static void lp_exec_continue(struct lp_exec_mask *mask)
290 {
291 LLVMBuilderRef builder = mask->bld->gallivm->builder;
292 LLVMValueRef exec_mask = LLVMBuildNot(builder,
293 mask->exec_mask,
294 "");
295
296 mask->cont_mask = LLVMBuildAnd(builder,
297 mask->cont_mask,
298 exec_mask, "");
299
300 lp_exec_mask_update(mask);
301 }
302
303
304 static void lp_exec_endloop(struct gallivm_state *gallivm,
305 struct lp_exec_mask *mask)
306 {
307 LLVMBuilderRef builder = mask->bld->gallivm->builder;
308 LLVMBasicBlockRef endloop;
309 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
310 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
311 mask->bld->type.width *
312 mask->bld->type.length);
313 LLVMValueRef i1cond, i2cond, icond, limiter;
314
315 assert(mask->break_mask);
316
317 /*
318 * Restore the cont_mask, but don't pop
319 */
320 assert(mask->loop_stack_size);
321 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
322 lp_exec_mask_update(mask);
323
324 /*
325 * Unlike the continue mask, the break_mask must be preserved across loop
326 * iterations
327 */
328 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
329
330 /* Decrement the loop limiter */
331 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
332
333 limiter = LLVMBuildSub(
334 builder,
335 limiter,
336 LLVMConstInt(int_type, 1, false),
337 "");
338
339 LLVMBuildStore(builder, limiter, mask->loop_limiter);
340
341 /* i1cond = (mask != 0) */
342 i1cond = LLVMBuildICmp(
343 builder,
344 LLVMIntNE,
345 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
346 LLVMConstNull(reg_type), "i1cond");
347
348 /* i2cond = (looplimiter > 0) */
349 i2cond = LLVMBuildICmp(
350 builder,
351 LLVMIntSGT,
352 limiter,
353 LLVMConstNull(int_type), "i2cond");
354
355 /* if( i1cond && i2cond ) */
356 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
357
358 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
359
360 LLVMBuildCondBr(builder,
361 icond, mask->loop_block, endloop);
362
363 LLVMPositionBuilderAtEnd(builder, endloop);
364
365 assert(mask->loop_stack_size);
366 --mask->loop_stack_size;
367 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
368 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
369 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
370 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
371 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
372
373 lp_exec_mask_update(mask);
374 }
375
376 static void lp_exec_switch(struct lp_exec_mask *mask,
377 LLVMValueRef switchval)
378 {
379 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
380 mask->break_type;
381 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
382
383 mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
384 mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
385 mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
386 mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
387 mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
388 mask->switch_stack_size++;
389
390 mask->switch_val = switchval;
391 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
392 mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
393 mask->switch_in_default = false;
394 mask->switch_pc = 0;
395
396 lp_exec_mask_update(mask);
397 }
398
399 static void lp_exec_endswitch(struct lp_exec_mask *mask,
400 struct lp_build_tgsi_context * bld_base)
401 {
402 LLVMBuilderRef builder = mask->bld->gallivm->builder;
403
404 /* check if there's deferred default if so do it now */
405 if (mask->switch_pc && !mask->switch_in_default) {
406 LLVMValueRef prevmask, defaultmask;
407 unsigned tmp_pc;
408 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
409 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
410 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
411 mask->switch_in_default = true;
412
413 lp_exec_mask_update(mask);
414
415 assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
416 TGSI_OPCODE_DEFAULT);
417
418 tmp_pc = bld_base->pc;
419 bld_base->pc = mask->switch_pc;
420 /*
421 * re-purpose switch_pc to point to here again, since we stop execution of
422 * the deferred default after next break.
423 */
424 mask->switch_pc = tmp_pc - 1;
425
426 return;
427 }
428
429 else if (mask->switch_pc && mask->switch_in_default) {
430 assert(bld_base->pc == mask->switch_pc + 1);
431 }
432
433 mask->switch_stack_size--;
434 mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
435 mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
436 mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
437 mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
438 mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
439
440 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
441
442 lp_exec_mask_update(mask);
443 }
444
445 static void lp_exec_case(struct lp_exec_mask *mask,
446 LLVMValueRef caseval)
447 {
448 LLVMBuilderRef builder = mask->bld->gallivm->builder;
449
450 LLVMValueRef casemask, prevmask;
451
452 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
453 if (!mask->switch_in_default) {
454 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
455 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
456 mask->switch_mask_default = LLVMBuildOr(builder, casemask,
457 mask->switch_mask_default, "sw_default_mask");
458 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
459 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
460
461 lp_exec_mask_update(mask);
462 }
463 }
464
465 /*
466 * Analyse default statement in a switch.
467 * \return true if default is last statement, false otherwise
468 * \param default_pc_start contains pc of instruction to jump to
469 * if default wasn't last but there's no
470 * fallthrough into default.
471 */
472 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
473 struct lp_build_tgsi_context * bld_base,
474 int *default_pc_start)
475 {
476 unsigned pc = bld_base->pc;
477 unsigned curr_switch_stack = mask->switch_stack_size;
478
479 /* skip over case statements which are together with default */
480 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
481 pc++;
482 }
483
484 while (pc != -1 && pc < bld_base->num_instructions) {
485 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
486 switch (opcode) {
487 case TGSI_OPCODE_CASE:
488 if (curr_switch_stack == mask->switch_stack_size) {
489 *default_pc_start = pc - 1;
490 return false;
491 }
492 break;
493 case TGSI_OPCODE_SWITCH:
494 curr_switch_stack++;
495 break;
496 case TGSI_OPCODE_ENDSWITCH:
497 if (curr_switch_stack == mask->switch_stack_size) {
498 *default_pc_start = pc - 1;
499 return true;
500 }
501 curr_switch_stack--;
502 break;
503 }
504 pc++;
505 }
506 /* should never arrive here */
507 assert(0);
508 return true;
509 }
510
511 static void lp_exec_default(struct lp_exec_mask *mask,
512 struct lp_build_tgsi_context * bld_base)
513 {
514 LLVMBuilderRef builder = mask->bld->gallivm->builder;
515
516 int default_exec_pc;
517 boolean default_is_last;
518
519 /*
520 * This is a messy opcode, because it may not be always at the end and
521 * there can be fallthrough in and out of it.
522 */
523
524 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
525 /*
526 * If it is last statement in switch (note that case statements appearing
527 * "at the same time" as default don't change that) everything is just fine,
528 * update switch mask and go on. This means we can handle default with
529 * fallthrough INTO it without overhead, if it is last.
530 */
531 if (default_is_last) {
532 LLVMValueRef prevmask, defaultmask;
533 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
534 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
535 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
536 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
537 mask->switch_in_default = true;
538
539 lp_exec_mask_update(mask);
540 }
541 else {
542 /*
543 * Technically, "case" immediately before default isn't really a
544 * fallthrough, however we still have to count them as such as we
545 * already have updated the masks.
546 * If that happens in practice could add a switch optimizer pass
547 * which just gets rid of all case statements appearing together with
548 * default (or could do switch analysis at switch start time instead).
549 */
550 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
551 boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
552 opcode != TGSI_OPCODE_SWITCH);
553 /*
554 * If it is not last statement and there was no fallthrough into it,
555 * we record the PC and continue execution at next case (again, those
556 * case encountered at the same time don't count). At endswitch
557 * time, we update switchmask, and go back executing the code we skipped
558 * until the next break (possibly re-executing some code with changed mask
559 * if there was a fallthrough out of default).
560 * Finally, if it is not last statement and there was a fallthrough into it,
561 * do the same as with the former case, except instead of skipping the code
562 * just execute it without updating the mask, then go back and re-execute.
563 */
564 mask->switch_pc = bld_base->pc;
565 if (!ft_into) {
566 bld_base->pc = default_exec_pc;
567 }
568 }
569 }
570
571
572 /* stores val into an address pointed to by dst.
573 * mask->exec_mask is used to figure out which bits of val
574 * should be stored into the address
575 * (0 means don't store this bit, 1 means do store).
576 */
577 static void lp_exec_mask_store(struct lp_exec_mask *mask,
578 struct lp_build_context *bld_store,
579 LLVMValueRef pred,
580 LLVMValueRef val,
581 LLVMValueRef dst)
582 {
583 LLVMBuilderRef builder = mask->bld->gallivm->builder;
584
585 /* Mix the predicate and execution mask */
586 if (mask->has_mask) {
587 if (pred) {
588 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
589 } else {
590 pred = mask->exec_mask;
591 }
592 }
593
594 if (pred) {
595 LLVMValueRef real_val, dst_val;
596
597 dst_val = LLVMBuildLoad(builder, dst, "");
598 real_val = lp_build_select(bld_store,
599 pred,
600 val, dst_val);
601
602 LLVMBuildStore(builder, real_val, dst);
603 } else
604 LLVMBuildStore(builder, val, dst);
605 }
606
607 static void lp_exec_mask_call(struct lp_exec_mask *mask,
608 int func,
609 int *pc)
610 {
611 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
612 mask->call_stack[mask->call_stack_size].pc = *pc;
613 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
614 mask->call_stack_size++;
615 *pc = func;
616 }
617
618 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
619 {
620 LLVMBuilderRef builder = mask->bld->gallivm->builder;
621 LLVMValueRef exec_mask;
622
623 if (mask->cond_stack_size == 0 &&
624 mask->loop_stack_size == 0 &&
625 mask->switch_stack_size == 0 &&
626 mask->call_stack_size == 0) {
627 /* returning from main() */
628 *pc = -1;
629 return;
630 }
631
632 if (mask->call_stack_size == 0) {
633 /*
634 * This requires special handling since we need to ensure
635 * we don't drop the mask even if we have no call stack
636 * (e.g. after a ret in a if clause after the endif)
637 */
638 mask->ret_in_main = TRUE;
639 }
640
641 exec_mask = LLVMBuildNot(builder,
642 mask->exec_mask,
643 "ret");
644
645 mask->ret_mask = LLVMBuildAnd(builder,
646 mask->ret_mask,
647 exec_mask, "ret_full");
648
649 lp_exec_mask_update(mask);
650 }
651
652 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
653 {
654 }
655
656 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
657 {
658 assert(mask->call_stack_size);
659 mask->call_stack_size--;
660 *pc = mask->call_stack[mask->call_stack_size].pc;
661 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
662 lp_exec_mask_update(mask);
663 }
664
665
666 /**
667 * Return pointer to a temporary register channel (src or dest).
668 * Note that indirect addressing cannot be handled here.
669 * \param index which temporary register
670 * \param chan which channel of the temp register.
671 */
672 LLVMValueRef
673 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
674 unsigned index,
675 unsigned chan)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 assert(chan < 4);
679 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
680 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
681 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
682 }
683 else {
684 return bld->temps[index][chan];
685 }
686 }
687
688 /**
689 * Return pointer to a output register channel (src or dest).
690 * Note that indirect addressing cannot be handled here.
691 * \param index which output register
692 * \param chan which channel of the output register.
693 */
694 LLVMValueRef
695 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
696 unsigned index,
697 unsigned chan)
698 {
699 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
700 assert(chan < 4);
701 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
702 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
703 index * 4 + chan);
704 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
705 }
706 else {
707 return bld->outputs[index][chan];
708 }
709 }
710
711 /*
712 * If we have indirect addressing in outputs copy our alloca array
713 * to the outputs slots specified by the caller to make sure
714 * our outputs are delivered consistently via the same interface.
715 */
716 static void
717 gather_outputs(struct lp_build_tgsi_soa_context * bld)
718 {
719 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
720 unsigned index, chan;
721 assert(bld->bld_base.info->num_outputs <=
722 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
723 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
724 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
725 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
726 }
727 }
728 }
729 }
730
731 /**
732 * Gather vector.
733 * XXX the lp_build_gather() function should be capable of doing this
734 * with a little work.
735 */
736 static LLVMValueRef
737 build_gather(struct lp_build_context *bld,
738 LLVMValueRef base_ptr,
739 LLVMValueRef indexes)
740 {
741 LLVMBuilderRef builder = bld->gallivm->builder;
742 LLVMValueRef res = bld->undef;
743 unsigned i;
744
745 /*
746 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
747 */
748 for (i = 0; i < bld->type.length; i++) {
749 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
750 LLVMValueRef index = LLVMBuildExtractElement(builder,
751 indexes, ii, "");
752 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
753 &index, 1, "gather_ptr");
754 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
755
756 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
757 }
758
759 return res;
760 }
761
762
763 /**
764 * Scatter/store vector.
765 */
766 static void
767 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
768 LLVMValueRef base_ptr,
769 LLVMValueRef indexes,
770 LLVMValueRef values,
771 struct lp_exec_mask *mask,
772 LLVMValueRef pred)
773 {
774 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
775 LLVMBuilderRef builder = gallivm->builder;
776 unsigned i;
777
778 /* Mix the predicate and execution mask */
779 if (mask->has_mask) {
780 if (pred) {
781 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
782 }
783 else {
784 pred = mask->exec_mask;
785 }
786 }
787
788 /*
789 * Loop over elements of index_vec, store scalar value.
790 */
791 for (i = 0; i < bld->bld_base.base.type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
794 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
795 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
796 LLVMValueRef scalar_pred = pred ?
797 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
798
799 if (0)
800 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
801 ii, val, index, scalar_ptr);
802
803 if (scalar_pred) {
804 LLVMValueRef real_val, dst_val;
805 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
806 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
807 LLVMBuildStore(builder, real_val, scalar_ptr);
808 }
809 else {
810 LLVMBuildStore(builder, val, scalar_ptr);
811 }
812 }
813 }
814
815
816 /**
817 * Read the current value of the ADDR register, convert the floats to
818 * ints, add the base index and return the vector of offsets.
819 * The offsets will be used to index into the constant buffer or
820 * temporary register file.
821 */
822 static LLVMValueRef
823 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
824 unsigned reg_file, unsigned reg_index,
825 const struct tgsi_ind_register *indirect_reg)
826 {
827 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
828 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
829 /* always use X component of address register */
830 unsigned swizzle = indirect_reg->Swizzle;
831 LLVMValueRef base;
832 LLVMValueRef rel;
833 LLVMValueRef max_index;
834 LLVMValueRef index;
835
836 assert(bld->indirect_files & (1 << reg_file));
837
838 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
839
840 assert(swizzle < 4);
841 switch (indirect_reg->File) {
842 case TGSI_FILE_ADDRESS:
843 rel = LLVMBuildLoad(builder,
844 bld->addr[indirect_reg->Index][swizzle],
845 "load addr reg");
846 /* ADDR LLVM values already have LLVM integer type. */
847 break;
848 case TGSI_FILE_TEMPORARY:
849 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
850 rel = LLVMBuildLoad(builder, rel, "load temp reg");
851 /* TEMP LLVM values always have LLVM float type, but for indirection, the
852 * value actually stored is expected to be an integer */
853 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
854 break;
855 default:
856 assert(0);
857 rel = uint_bld->zero;
858 }
859
860 index = lp_build_add(uint_bld, base, rel);
861
862 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
863 uint_bld->type,
864 bld->bld_base.info->file_max[reg_file]);
865
866 assert(!uint_bld->type.sign);
867 index = lp_build_min(uint_bld, index, max_index);
868
869 return index;
870 }
871
872 static struct lp_build_context *
873 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
874 enum tgsi_opcode_type stype)
875 {
876 struct lp_build_context *bld_fetch;
877
878 switch (stype) {
879 case TGSI_TYPE_FLOAT:
880 case TGSI_TYPE_UNTYPED:
881 bld_fetch = &bld_base->base;
882 break;
883 case TGSI_TYPE_UNSIGNED:
884 bld_fetch = &bld_base->uint_bld;
885 break;
886 case TGSI_TYPE_SIGNED:
887 bld_fetch = &bld_base->int_bld;
888 break;
889 case TGSI_TYPE_VOID:
890 case TGSI_TYPE_DOUBLE:
891 default:
892 assert(0);
893 bld_fetch = NULL;
894 break;
895 }
896 return bld_fetch;
897 }
898
899 static LLVMValueRef
900 emit_fetch_constant(
901 struct lp_build_tgsi_context * bld_base,
902 const struct tgsi_full_src_register * reg,
903 enum tgsi_opcode_type stype,
904 unsigned swizzle)
905 {
906 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
907 struct gallivm_state *gallivm = bld_base->base.gallivm;
908 LLVMBuilderRef builder = gallivm->builder;
909 struct lp_build_context *uint_bld = &bld_base->uint_bld;
910 LLVMValueRef indirect_index = NULL;
911 unsigned dimension = 0;
912 LLVMValueRef dimension_index;
913 LLVMValueRef consts_ptr;
914 LLVMValueRef res;
915
916 /* XXX: Handle fetching xyzw components as a vector */
917 assert(swizzle != ~0);
918
919 if (reg->Register.Dimension) {
920 assert(!reg->Dimension.Indirect);
921 dimension = reg->Dimension.Index;
922 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
923 }
924
925 dimension_index = lp_build_const_int32(gallivm, dimension);
926 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
927
928 if (reg->Register.Indirect) {
929 indirect_index = get_indirect_index(bld,
930 reg->Register.File,
931 reg->Register.Index,
932 &reg->Indirect);
933 }
934
935 if (reg->Register.Indirect) {
936 LLVMValueRef swizzle_vec =
937 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
938 LLVMValueRef index_vec; /* index into the const buffer */
939
940 /* index_vec = indirect_index * 4 + swizzle */
941 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
942 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
943
944 /* Gather values from the constant buffer */
945 res = build_gather(&bld_base->base, consts_ptr, index_vec);
946 }
947 else {
948 LLVMValueRef index; /* index into the const buffer */
949 LLVMValueRef scalar, scalar_ptr;
950
951 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
952
953 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
954 &index, 1, "");
955 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
956 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
957 }
958
959 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
960 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
961 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
962 }
963 return res;
964 }
965
966 static LLVMValueRef
967 emit_fetch_immediate(
968 struct lp_build_tgsi_context * bld_base,
969 const struct tgsi_full_src_register * reg,
970 enum tgsi_opcode_type stype,
971 unsigned swizzle)
972 {
973 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
974 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
975 assert(res);
976
977 if (stype == TGSI_TYPE_UNSIGNED) {
978 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
979 } else if (stype == TGSI_TYPE_SIGNED) {
980 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
981 }
982 return res;
983 }
984
985 static LLVMValueRef
986 emit_fetch_input(
987 struct lp_build_tgsi_context * bld_base,
988 const struct tgsi_full_src_register * reg,
989 enum tgsi_opcode_type stype,
990 unsigned swizzle)
991 {
992 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
993 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
994 LLVMBuilderRef builder = gallivm->builder;
995 struct lp_build_context *uint_bld = &bld_base->uint_bld;
996 LLVMValueRef indirect_index = NULL;
997 LLVMValueRef res;
998
999 if (reg->Register.Indirect) {
1000 indirect_index = get_indirect_index(bld,
1001 reg->Register.File,
1002 reg->Register.Index,
1003 &reg->Indirect);
1004 }
1005
1006 if (reg->Register.Indirect) {
1007 LLVMValueRef swizzle_vec =
1008 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1009 LLVMValueRef length_vec =
1010 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1011 LLVMValueRef index_vec; /* index into the const buffer */
1012 LLVMValueRef inputs_array;
1013 LLVMTypeRef float4_ptr_type;
1014
1015 /* index_vec = (indirect_index * 4 + swizzle) * length */
1016 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1017 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1018 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1019
1020 /* cast inputs_array pointer to float* */
1021 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1022 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
1023 float4_ptr_type, "");
1024
1025 /* Gather values from the temporary register array */
1026 res = build_gather(&bld_base->base, inputs_array, index_vec);
1027 } else {
1028 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1029 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1030 reg->Register.Index * 4 + swizzle);
1031 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1032 bld->inputs_array, &lindex, 1, "");
1033 res = LLVMBuildLoad(builder, input_ptr, "");
1034 }
1035 else {
1036 res = bld->inputs[reg->Register.Index][swizzle];
1037 }
1038 }
1039
1040 assert(res);
1041
1042 if (stype == TGSI_TYPE_UNSIGNED) {
1043 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1044 } else if (stype == TGSI_TYPE_SIGNED) {
1045 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1046 }
1047
1048 return res;
1049 }
1050
1051
1052 static LLVMValueRef
1053 emit_fetch_gs_input(
1054 struct lp_build_tgsi_context * bld_base,
1055 const struct tgsi_full_src_register * reg,
1056 enum tgsi_opcode_type stype,
1057 unsigned swizzle)
1058 {
1059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1060 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1061 LLVMBuilderRef builder = gallivm->builder;
1062 LLVMValueRef attrib_index = NULL;
1063 LLVMValueRef vertex_index = NULL;
1064 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1065 LLVMValueRef res;
1066
1067 if (reg->Register.Indirect) {
1068 attrib_index = get_indirect_index(bld,
1069 reg->Register.File,
1070 reg->Register.Index,
1071 &reg->Indirect);
1072 } else {
1073 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1074 }
1075
1076 if (reg->Dimension.Indirect) {
1077 vertex_index = get_indirect_index(bld,
1078 reg->Register.File,
1079 reg->Dimension.Index,
1080 &reg->DimIndirect);
1081 } else {
1082 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1083 }
1084
1085 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1086 reg->Dimension.Indirect,
1087 vertex_index, attrib_index,
1088 swizzle_index);
1089
1090 assert(res);
1091
1092 if (stype == TGSI_TYPE_UNSIGNED) {
1093 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1094 } else if (stype == TGSI_TYPE_SIGNED) {
1095 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1096 }
1097
1098 return res;
1099 }
1100
1101 static LLVMValueRef
1102 emit_fetch_temporary(
1103 struct lp_build_tgsi_context * bld_base,
1104 const struct tgsi_full_src_register * reg,
1105 enum tgsi_opcode_type stype,
1106 unsigned swizzle)
1107 {
1108 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1109 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1110 LLVMBuilderRef builder = gallivm->builder;
1111 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1112 LLVMValueRef indirect_index = NULL;
1113 LLVMValueRef res;
1114
1115 if (reg->Register.Indirect) {
1116 indirect_index = get_indirect_index(bld,
1117 reg->Register.File,
1118 reg->Register.Index,
1119 &reg->Indirect);
1120 }
1121
1122 if (reg->Register.Indirect) {
1123 LLVMValueRef swizzle_vec =
1124 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
1125 LLVMValueRef length_vec =
1126 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
1127 bld->bld_base.base.type.length);
1128 LLVMValueRef index_vec; /* index into the const buffer */
1129 LLVMValueRef temps_array;
1130 LLVMTypeRef float4_ptr_type;
1131
1132 /* index_vec = (indirect_index * 4 + swizzle) * length */
1133 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1134 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1135 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1136
1137 /* cast temps_array pointer to float* */
1138 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
1139 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1140 float4_ptr_type, "");
1141
1142 /* Gather values from the temporary register array */
1143 res = build_gather(&bld_base->base, temps_array, index_vec);
1144 }
1145 else {
1146 LLVMValueRef temp_ptr;
1147 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1148 res = LLVMBuildLoad(builder, temp_ptr, "");
1149 }
1150
1151 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1152 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1153 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1154 }
1155
1156 return res;
1157 }
1158
1159 static LLVMValueRef
1160 emit_fetch_system_value(
1161 struct lp_build_tgsi_context * bld_base,
1162 const struct tgsi_full_src_register * reg,
1163 enum tgsi_opcode_type stype,
1164 unsigned swizzle)
1165 {
1166 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1167 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1168 const struct tgsi_shader_info *info = bld->bld_base.info;
1169 LLVMBuilderRef builder = gallivm->builder;
1170 LLVMValueRef res;
1171 enum tgsi_opcode_type atype; // Actual type of the value
1172
1173 assert(!reg->Register.Indirect);
1174
1175 switch (info->system_value_semantic_name[reg->Register.Index]) {
1176 case TGSI_SEMANTIC_INSTANCEID:
1177 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1178 atype = TGSI_TYPE_UNSIGNED;
1179 break;
1180
1181 case TGSI_SEMANTIC_VERTEXID:
1182 res = bld->system_values.vertex_id;
1183 atype = TGSI_TYPE_UNSIGNED;
1184 break;
1185
1186 case TGSI_SEMANTIC_PRIMID:
1187 res = bld->system_values.prim_id;
1188 atype = TGSI_TYPE_UNSIGNED;
1189 break;
1190
1191 default:
1192 assert(!"unexpected semantic in emit_fetch_system_value");
1193 res = bld_base->base.zero;
1194 atype = TGSI_TYPE_FLOAT;
1195 break;
1196 }
1197
1198 if (atype != stype) {
1199 if (stype == TGSI_TYPE_FLOAT) {
1200 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1201 } else if (stype == TGSI_TYPE_UNSIGNED) {
1202 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1203 } else if (stype == TGSI_TYPE_SIGNED) {
1204 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1205 }
1206 }
1207
1208 return res;
1209 }
1210
1211 /**
1212 * Register fetch with derivatives.
1213 */
1214 static void
1215 emit_fetch_deriv(
1216 struct lp_build_tgsi_soa_context *bld,
1217 LLVMValueRef src,
1218 LLVMValueRef *res,
1219 LLVMValueRef *ddx,
1220 LLVMValueRef *ddy)
1221 {
1222 if(res)
1223 *res = src;
1224
1225 /* TODO: use interpolation coeffs for inputs */
1226
1227 if(ddx)
1228 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1229
1230 if(ddy)
1231 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1232 }
1233
1234
1235 /**
1236 * Predicate.
1237 */
1238 static void
1239 emit_fetch_predicate(
1240 struct lp_build_tgsi_soa_context *bld,
1241 const struct tgsi_full_instruction *inst,
1242 LLVMValueRef *pred)
1243 {
1244 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1245 unsigned index;
1246 unsigned char swizzles[4];
1247 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1248 LLVMValueRef value;
1249 unsigned chan;
1250
1251 if (!inst->Instruction.Predicate) {
1252 TGSI_FOR_EACH_CHANNEL( chan ) {
1253 pred[chan] = NULL;
1254 }
1255 return;
1256 }
1257
1258 swizzles[0] = inst->Predicate.SwizzleX;
1259 swizzles[1] = inst->Predicate.SwizzleY;
1260 swizzles[2] = inst->Predicate.SwizzleZ;
1261 swizzles[3] = inst->Predicate.SwizzleW;
1262
1263 index = inst->Predicate.Index;
1264 assert(index < LP_MAX_TGSI_PREDS);
1265
1266 TGSI_FOR_EACH_CHANNEL( chan ) {
1267 unsigned swizzle = swizzles[chan];
1268
1269 /*
1270 * Only fetch the predicate register channels that are actually listed
1271 * in the swizzles
1272 */
1273 if (!unswizzled[swizzle]) {
1274 value = LLVMBuildLoad(builder,
1275 bld->preds[index][swizzle], "");
1276
1277 /*
1278 * Convert the value to an integer mask.
1279 *
1280 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1281 * is needlessly causing two comparisons due to storing the intermediate
1282 * result as float vector instead of an integer mask vector.
1283 */
1284 value = lp_build_compare(bld->bld_base.base.gallivm,
1285 bld->bld_base.base.type,
1286 PIPE_FUNC_NOTEQUAL,
1287 value,
1288 bld->bld_base.base.zero);
1289 if (inst->Predicate.Negate) {
1290 value = LLVMBuildNot(builder, value, "");
1291 }
1292
1293 unswizzled[swizzle] = value;
1294 } else {
1295 value = unswizzled[swizzle];
1296 }
1297
1298 pred[chan] = value;
1299 }
1300 }
1301
1302 /**
1303 * Register store.
1304 */
1305 static void
1306 emit_store_chan(
1307 struct lp_build_tgsi_context *bld_base,
1308 const struct tgsi_full_instruction *inst,
1309 unsigned index,
1310 unsigned chan_index,
1311 LLVMValueRef pred,
1312 LLVMValueRef value)
1313 {
1314 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1315 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1316 LLVMBuilderRef builder = gallivm->builder;
1317 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1318 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1319 LLVMValueRef indirect_index = NULL;
1320 struct lp_build_context *bld_store;
1321 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1322
1323 switch (dtype) {
1324 default:
1325 case TGSI_TYPE_FLOAT:
1326 case TGSI_TYPE_UNTYPED:
1327 bld_store = &bld_base->base;
1328 break;
1329 case TGSI_TYPE_UNSIGNED:
1330 bld_store = &bld_base->uint_bld;
1331 break;
1332 case TGSI_TYPE_SIGNED:
1333 bld_store = &bld_base->int_bld;
1334 break;
1335 case TGSI_TYPE_DOUBLE:
1336 case TGSI_TYPE_VOID:
1337 assert(0);
1338 bld_store = NULL;
1339 break;
1340 }
1341
1342 /* If the destination is untyped then the source can be anything,
1343 * but LLVM won't like if the types don't match so lets cast
1344 * to the correct destination type as expected by LLVM. */
1345 if (dtype == TGSI_TYPE_UNTYPED &&
1346 !lp_check_vec_type(bld_store->type, LLVMTypeOf(value))) {
1347 value = LLVMBuildBitCast(builder, value, bld_store->vec_type,
1348 "src_casted");
1349 }
1350
1351 switch( inst->Instruction.Saturate ) {
1352 case TGSI_SAT_NONE:
1353 break;
1354
1355 case TGSI_SAT_ZERO_ONE:
1356 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
1357 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
1358 break;
1359
1360 case TGSI_SAT_MINUS_PLUS_ONE:
1361 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
1362 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
1363 break;
1364
1365 default:
1366 assert(0);
1367 }
1368
1369 if (reg->Register.Indirect) {
1370 indirect_index = get_indirect_index(bld,
1371 reg->Register.File,
1372 reg->Register.Index,
1373 &reg->Indirect);
1374 } else {
1375 assert(reg->Register.Index <=
1376 bld->bld_base.info->file_max[reg->Register.File]);
1377 }
1378
1379 switch( reg->Register.File ) {
1380 case TGSI_FILE_OUTPUT:
1381 if (reg->Register.Indirect) {
1382 LLVMValueRef chan_vec =
1383 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1384 LLVMValueRef length_vec =
1385 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1386 LLVMValueRef index_vec; /* indexes into the temp registers */
1387 LLVMValueRef outputs_array;
1388 LLVMValueRef pixel_offsets;
1389 LLVMTypeRef float_ptr_type;
1390 int i;
1391
1392 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1393 pixel_offsets = uint_bld->undef;
1394 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1395 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1396 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1397 ii, ii, "");
1398 }
1399
1400 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1401 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1402 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1403 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1404 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1405
1406 float_ptr_type =
1407 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1408 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1409 float_ptr_type, "");
1410
1411 /* Scatter store values into temp registers */
1412 emit_mask_scatter(bld, outputs_array, index_vec, value,
1413 &bld->exec_mask, pred);
1414 }
1415 else {
1416 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1417 chan_index);
1418 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1419 }
1420 break;
1421
1422 case TGSI_FILE_TEMPORARY:
1423 if (reg->Register.Indirect) {
1424 LLVMValueRef chan_vec =
1425 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1426 LLVMValueRef length_vec =
1427 lp_build_const_int_vec(gallivm, uint_bld->type,
1428 bld->bld_base.base.type.length);
1429 LLVMValueRef index_vec; /* indexes into the temp registers */
1430 LLVMValueRef temps_array;
1431 LLVMValueRef pixel_offsets;
1432 LLVMTypeRef float_ptr_type;
1433 int i;
1434
1435 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1436 pixel_offsets = uint_bld->undef;
1437 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1438 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1439 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1440 ii, ii, "");
1441 }
1442
1443 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1444 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1445 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1446 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1447 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1448
1449 float_ptr_type =
1450 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1451 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1452 float_ptr_type, "");
1453
1454 /* Scatter store values into temp registers */
1455 emit_mask_scatter(bld, temps_array, index_vec, value,
1456 &bld->exec_mask, pred);
1457 }
1458 else {
1459 LLVMValueRef temp_ptr;
1460
1461 switch (dtype) {
1462 case TGSI_TYPE_UNSIGNED:
1463 case TGSI_TYPE_SIGNED: {
1464 LLVMTypeRef itype = bld_base->int_bld.vec_type;
1465 LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1466 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1467 chan_index);
1468 LLVMValueRef temp_value_ptr;
1469
1470 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1471 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1472 value = temp_value_ptr;
1473 break;
1474 }
1475 default:
1476 case TGSI_TYPE_FLOAT:
1477 case TGSI_TYPE_UNTYPED:
1478 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1479 chan_index);
1480 break;
1481 }
1482
1483 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1484 }
1485 break;
1486
1487 case TGSI_FILE_ADDRESS:
1488 assert(dtype == TGSI_TYPE_SIGNED);
1489 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1490 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1491 bld->addr[reg->Register.Index][chan_index]);
1492 break;
1493
1494 case TGSI_FILE_PREDICATE:
1495 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1496 bld->preds[reg->Register.Index][chan_index]);
1497 break;
1498
1499 default:
1500 assert( 0 );
1501 }
1502 }
1503
1504 static void
1505 emit_store(
1506 struct lp_build_tgsi_context * bld_base,
1507 const struct tgsi_full_instruction * inst,
1508 const struct tgsi_opcode_info * info,
1509 LLVMValueRef dst[4])
1510
1511 {
1512 unsigned chan_index;
1513 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1514
1515 if(info->num_dst) {
1516 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1517
1518 emit_fetch_predicate( bld, inst, pred );
1519
1520 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1521 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1522 }
1523 }
1524 }
1525
1526 /**
1527 * High-level instruction translators.
1528 */
1529
1530 static void
1531 emit_tex( struct lp_build_tgsi_soa_context *bld,
1532 const struct tgsi_full_instruction *inst,
1533 enum lp_build_tex_modifier modifier,
1534 LLVMValueRef *texel)
1535 {
1536 unsigned unit;
1537 LLVMValueRef lod_bias, explicit_lod;
1538 LLVMValueRef oow = NULL;
1539 LLVMValueRef coords[4];
1540 LLVMValueRef offsets[3] = { NULL };
1541 struct lp_derivatives derivs;
1542 struct lp_derivatives *deriv_ptr = NULL;
1543 unsigned num_coords, num_derivs, num_offsets;
1544 unsigned i;
1545
1546 if (!bld->sampler) {
1547 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1548 for (i = 0; i < 4; i++) {
1549 texel[i] = bld->bld_base.base.undef;
1550 }
1551 return;
1552 }
1553
1554 switch (inst->Texture.Texture) {
1555 case TGSI_TEXTURE_1D:
1556 num_coords = 1;
1557 num_offsets = 1;
1558 num_derivs = 1;
1559 break;
1560 case TGSI_TEXTURE_1D_ARRAY:
1561 num_coords = 2;
1562 num_offsets = 1;
1563 num_derivs = 1;
1564 break;
1565 case TGSI_TEXTURE_2D:
1566 case TGSI_TEXTURE_RECT:
1567 num_coords = 2;
1568 num_offsets = 2;
1569 num_derivs = 2;
1570 break;
1571 case TGSI_TEXTURE_SHADOW1D:
1572 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1573 num_coords = 3;
1574 num_offsets = 1;
1575 num_derivs = 1;
1576 break;
1577 case TGSI_TEXTURE_SHADOW2D:
1578 case TGSI_TEXTURE_SHADOWRECT:
1579 case TGSI_TEXTURE_2D_ARRAY:
1580 num_coords = 3;
1581 num_offsets = 2;
1582 num_derivs = 2;
1583 break;
1584 case TGSI_TEXTURE_CUBE:
1585 num_coords = 3;
1586 num_offsets = 2;
1587 num_derivs = 3;
1588 break;
1589 case TGSI_TEXTURE_3D:
1590 num_coords = 3;
1591 num_offsets = 3;
1592 num_derivs = 3;
1593 break;
1594 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1595 num_coords = 4;
1596 num_offsets = 2;
1597 num_derivs = 2;
1598 break;
1599 case TGSI_TEXTURE_SHADOWCUBE:
1600 num_coords = 4;
1601 num_offsets = 2;
1602 num_derivs = 3;
1603 break;
1604 default:
1605 assert(0);
1606 return;
1607 }
1608
1609 /* Note lod and especially projected are illegal in a LOT of cases */
1610 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1611 assert(num_coords < 4);
1612 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1613 explicit_lod = NULL;
1614 }
1615 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1616 assert(num_coords < 4);
1617 lod_bias = NULL;
1618 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1619 }
1620 else {
1621 lod_bias = NULL;
1622 explicit_lod = NULL;
1623 }
1624
1625 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1626 assert(num_coords < 4);
1627 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1628 oow = lp_build_rcp(&bld->bld_base.base, oow);
1629 }
1630
1631 for (i = 0; i < num_coords; i++) {
1632 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1633 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1634 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1635 }
1636 for (i = num_coords; i < 4; i++) {
1637 coords[i] = bld->bld_base.base.undef;
1638 }
1639
1640 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1641 unsigned dim;
1642 for (dim = 0; dim < num_derivs; ++dim) {
1643 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1644 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1645 }
1646 deriv_ptr = &derivs;
1647 unit = inst->Src[3].Register.Index;
1648 } else {
1649 unit = inst->Src[1].Register.Index;
1650 }
1651
1652 /* some advanced gather instructions (txgo) would require 4 offsets */
1653 if (inst->Texture.NumOffsets == 1) {
1654 unsigned dim;
1655 for (dim = 0; dim < num_offsets; dim++) {
1656 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1657 }
1658 }
1659
1660 bld->sampler->emit_fetch_texel(bld->sampler,
1661 bld->bld_base.base.gallivm,
1662 bld->bld_base.base.type,
1663 FALSE,
1664 unit, unit,
1665 coords,
1666 offsets,
1667 deriv_ptr,
1668 lod_bias, explicit_lod,
1669 texel);
1670 }
1671
1672 static void
1673 emit_sample(struct lp_build_tgsi_soa_context *bld,
1674 const struct tgsi_full_instruction *inst,
1675 enum lp_build_tex_modifier modifier,
1676 boolean compare,
1677 LLVMValueRef *texel)
1678 {
1679 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1680 unsigned texture_unit, sampler_unit;
1681 LLVMValueRef lod_bias, explicit_lod;
1682 LLVMValueRef coords[4];
1683 LLVMValueRef offsets[3] = { NULL };
1684 struct lp_derivatives derivs;
1685 struct lp_derivatives *deriv_ptr = NULL;
1686 unsigned num_coords, num_offsets, num_derivs;
1687 unsigned i;
1688
1689 if (!bld->sampler) {
1690 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1691 for (i = 0; i < 4; i++) {
1692 texel[i] = bld->bld_base.base.undef;
1693 }
1694 return;
1695 }
1696
1697 /*
1698 * unlike old-style tex opcodes the texture/sampler indices
1699 * always come from src1 and src2 respectively.
1700 */
1701 texture_unit = inst->Src[1].Register.Index;
1702 sampler_unit = inst->Src[2].Register.Index;
1703
1704 /*
1705 * Note inst->Texture.Texture will contain the number of offsets,
1706 * however the target information is NOT there and comes from the
1707 * declared sampler views instead.
1708 */
1709 switch (bld->sv[texture_unit].Resource) {
1710 case TGSI_TEXTURE_1D:
1711 num_coords = 1;
1712 num_offsets = 1;
1713 num_derivs = 1;
1714 break;
1715 case TGSI_TEXTURE_1D_ARRAY:
1716 num_coords = 2;
1717 num_offsets = 1;
1718 num_derivs = 1;
1719 break;
1720 case TGSI_TEXTURE_2D:
1721 case TGSI_TEXTURE_RECT:
1722 num_coords = 2;
1723 num_offsets = 2;
1724 num_derivs = 2;
1725 break;
1726 case TGSI_TEXTURE_2D_ARRAY:
1727 num_coords = 3;
1728 num_offsets = 2;
1729 num_derivs = 2;
1730 break;
1731 case TGSI_TEXTURE_CUBE:
1732 num_coords = 3;
1733 num_offsets = 2;
1734 num_derivs = 3;
1735 break;
1736 case TGSI_TEXTURE_3D:
1737 num_coords = 3;
1738 num_offsets = 3;
1739 num_derivs = 3;
1740 break;
1741 case TGSI_TEXTURE_CUBE_ARRAY:
1742 num_coords = 4;
1743 num_offsets = 2;
1744 num_derivs = 3;
1745 break;
1746 default:
1747 assert(0);
1748 return;
1749 }
1750
1751 /*
1752 * unlike old-style tex opcodes the texture/sampler indices
1753 * always come from src1 and src2 respectively.
1754 */
1755 texture_unit = inst->Src[1].Register.Index;
1756 sampler_unit = inst->Src[2].Register.Index;
1757
1758 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1759 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1760 explicit_lod = NULL;
1761 }
1762 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1763 lod_bias = NULL;
1764 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1765 }
1766 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1767 lod_bias = NULL;
1768 /* XXX might be better to explicitly pass the level zero information */
1769 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1770 }
1771 else {
1772 lod_bias = NULL;
1773 explicit_lod = NULL;
1774 }
1775
1776 for (i = 0; i < num_coords; i++) {
1777 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1778 }
1779 for (i = num_coords; i < 4; i++) {
1780 coords[i] = bld->bld_base.base.undef;
1781 }
1782 /*
1783 * XXX: whack shadow comparison value into place.
1784 * Should probably fix the interface for separate value
1785 * (it will not work for cube arrays if it is part of coords).
1786 */
1787 if (compare) {
1788 unsigned c_coord = num_coords > 2 ? 3 : 2;
1789 assert(num_coords < 4);
1790 coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1791 }
1792
1793 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1794 unsigned dim;
1795 for (dim = 0; dim < num_derivs; ++dim) {
1796 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
1797 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
1798 }
1799 deriv_ptr = &derivs;
1800 }
1801
1802 /* some advanced gather instructions (txgo) would require 4 offsets */
1803 if (inst->Texture.NumOffsets == 1) {
1804 unsigned dim;
1805 for (dim = 0; dim < num_offsets; dim++) {
1806 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1807 }
1808 }
1809
1810 bld->sampler->emit_fetch_texel(bld->sampler,
1811 bld->bld_base.base.gallivm,
1812 bld->bld_base.base.type,
1813 FALSE,
1814 texture_unit, sampler_unit,
1815 coords,
1816 offsets,
1817 deriv_ptr,
1818 lod_bias, explicit_lod,
1819 texel);
1820 }
1821
1822 static void
1823 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
1824 const struct tgsi_full_instruction *inst,
1825 LLVMValueRef *texel,
1826 boolean is_samplei)
1827 {
1828 unsigned unit, target;
1829 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1830 LLVMValueRef explicit_lod = NULL;
1831 LLVMValueRef coords[3];
1832 LLVMValueRef offsets[3] = { NULL };
1833 unsigned num_coords;
1834 unsigned dims;
1835 unsigned i;
1836
1837 if (!bld->sampler) {
1838 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1839 for (i = 0; i < 4; i++) {
1840 texel[i] = coord_undef;
1841 }
1842 return;
1843 }
1844
1845 unit = inst->Src[1].Register.Index;
1846
1847 if (is_samplei) {
1848 target = bld->sv[unit].Resource;
1849 }
1850 else {
1851 target = inst->Texture.Texture;
1852 }
1853
1854 switch (target) {
1855 case TGSI_TEXTURE_1D:
1856 case TGSI_TEXTURE_BUFFER:
1857 num_coords = 1;
1858 dims = 1;
1859 break;
1860 case TGSI_TEXTURE_1D_ARRAY:
1861 num_coords = 2;
1862 dims = 1;
1863 break;
1864 case TGSI_TEXTURE_2D:
1865 case TGSI_TEXTURE_RECT:
1866 num_coords = 2;
1867 dims = 2;
1868 break;
1869 case TGSI_TEXTURE_2D_ARRAY:
1870 num_coords = 3;
1871 dims = 2;
1872 break;
1873 case TGSI_TEXTURE_3D:
1874 num_coords = 3;
1875 dims = 3;
1876 break;
1877 default:
1878 assert(0);
1879 return;
1880 }
1881
1882 /* always have lod except for buffers ? */
1883 if (target != TGSI_TEXTURE_BUFFER) {
1884 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1885 }
1886
1887 for (i = 0; i < num_coords; i++) {
1888 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1889 }
1890 for (i = num_coords; i < 3; i++) {
1891 coords[i] = coord_undef;
1892 }
1893
1894 if (inst->Texture.NumOffsets == 1) {
1895 unsigned dim;
1896 for (dim = 0; dim < dims; dim++) {
1897 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1898 }
1899 }
1900
1901 bld->sampler->emit_fetch_texel(bld->sampler,
1902 bld->bld_base.base.gallivm,
1903 bld->bld_base.base.type,
1904 TRUE,
1905 unit, unit,
1906 coords,
1907 offsets,
1908 NULL,
1909 NULL, explicit_lod,
1910 texel);
1911 }
1912
1913 static void
1914 emit_size_query( struct lp_build_tgsi_soa_context *bld,
1915 const struct tgsi_full_instruction *inst,
1916 LLVMValueRef *sizes_out,
1917 boolean is_sviewinfo)
1918 {
1919 LLVMValueRef explicit_lod;
1920 unsigned has_lod;
1921 unsigned i;
1922 unsigned unit = inst->Src[1].Register.Index;
1923 unsigned target;
1924
1925 if (is_sviewinfo) {
1926 target = bld->sv[unit].Resource;
1927 }
1928 else {
1929 target = inst->Texture.Texture;
1930 }
1931 switch (target) {
1932 case TGSI_TEXTURE_BUFFER:
1933 case TGSI_TEXTURE_RECT:
1934 case TGSI_TEXTURE_SHADOWRECT:
1935 has_lod = 0;
1936 break;
1937 default:
1938 has_lod = 1;
1939 break;
1940 }
1941
1942 if (!bld->sampler) {
1943 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1944 for (i = 0; i < 4; i++)
1945 sizes_out[i] = bld->bld_base.int_bld.undef;
1946 return;
1947 }
1948
1949 if (has_lod)
1950 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
1951 else
1952 explicit_lod = NULL;
1953
1954 bld->sampler->emit_size_query(bld->sampler,
1955 bld->bld_base.base.gallivm,
1956 bld->bld_base.int_bld.type,
1957 unit,
1958 is_sviewinfo,
1959 explicit_lod,
1960 sizes_out);
1961 }
1962
1963 static boolean
1964 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1965 int pc)
1966 {
1967 int i;
1968
1969 for (i = 0; i < 5; i++) {
1970 unsigned opcode;
1971
1972 if (pc + i >= bld->bld_base.info->num_instructions)
1973 return TRUE;
1974
1975 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1976
1977 if (opcode == TGSI_OPCODE_END)
1978 return TRUE;
1979
1980 if (opcode == TGSI_OPCODE_TEX ||
1981 opcode == TGSI_OPCODE_TXP ||
1982 opcode == TGSI_OPCODE_TXD ||
1983 opcode == TGSI_OPCODE_TXB ||
1984 opcode == TGSI_OPCODE_TXL ||
1985 opcode == TGSI_OPCODE_TXF ||
1986 opcode == TGSI_OPCODE_TXQ ||
1987 opcode == TGSI_OPCODE_CAL ||
1988 opcode == TGSI_OPCODE_CALLNZ ||
1989 opcode == TGSI_OPCODE_IF ||
1990 opcode == TGSI_OPCODE_UIF ||
1991 opcode == TGSI_OPCODE_BGNLOOP ||
1992 opcode == TGSI_OPCODE_SWITCH)
1993 return FALSE;
1994 }
1995
1996 return TRUE;
1997 }
1998
1999
2000
2001 /**
2002 * Kill fragment if any of the src register values are negative.
2003 */
2004 static void
2005 emit_kil(
2006 struct lp_build_tgsi_soa_context *bld,
2007 const struct tgsi_full_instruction *inst,
2008 int pc)
2009 {
2010 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2011 const struct tgsi_full_src_register *reg = &inst->Src[0];
2012 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2013 LLVMValueRef mask;
2014 unsigned chan_index;
2015
2016 memset(&terms, 0, sizeof terms);
2017
2018 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2019 unsigned swizzle;
2020
2021 /* Unswizzle channel */
2022 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2023
2024 /* Check if the component has not been already tested. */
2025 assert(swizzle < TGSI_NUM_CHANNELS);
2026 if( !terms[swizzle] )
2027 /* TODO: change the comparison operator instead of setting the sign */
2028 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2029 }
2030
2031 mask = NULL;
2032 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2033 if(terms[chan_index]) {
2034 LLVMValueRef chan_mask;
2035
2036 /*
2037 * If term < 0 then mask = 0 else mask = ~0.
2038 */
2039 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2040
2041 if(mask)
2042 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2043 else
2044 mask = chan_mask;
2045 }
2046 }
2047
2048 if(mask) {
2049 lp_build_mask_update(bld->mask, mask);
2050
2051 if (!near_end_of_shader(bld, pc))
2052 lp_build_mask_check(bld->mask);
2053 }
2054 }
2055
2056
2057 /**
2058 * Predicated fragment kill.
2059 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
2060 * The only predication is the execution mask which will apply if
2061 * we're inside a loop or conditional.
2062 */
2063 static void
2064 emit_kilp(struct lp_build_tgsi_soa_context *bld,
2065 int pc)
2066 {
2067 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2068 LLVMValueRef mask;
2069
2070 /* For those channels which are "alive", disable fragment shader
2071 * execution.
2072 */
2073 if (bld->exec_mask.has_mask) {
2074 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2075 }
2076 else {
2077 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2078 mask = zero;
2079 }
2080
2081 lp_build_mask_update(bld->mask, mask);
2082
2083 if (!near_end_of_shader(bld, pc))
2084 lp_build_mask_check(bld->mask);
2085 }
2086
2087
2088 /**
2089 * Emit code which will dump the value of all the temporary registers
2090 * to stdout.
2091 */
2092 static void
2093 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
2094 {
2095 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2096 LLVMBuilderRef builder = gallivm->builder;
2097 LLVMValueRef temp_ptr;
2098 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
2099 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
2100 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
2101 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
2102 int index;
2103 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
2104
2105 for (index = 0; index < n; index++) {
2106 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
2107 LLVMValueRef v[4][4], res;
2108 int chan;
2109
2110 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
2111
2112 for (chan = 0; chan < 4; chan++) {
2113 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2114 res = LLVMBuildLoad(builder, temp_ptr, "");
2115 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
2116 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
2117 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
2118 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
2119 }
2120
2121 lp_build_printf(gallivm, " X: %f %f %f %f\n",
2122 v[0][0], v[0][1], v[0][2], v[0][3]);
2123 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
2124 v[1][0], v[1][1], v[1][2], v[1][3]);
2125 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
2126 v[2][0], v[2][1], v[2][2], v[2][3]);
2127 lp_build_printf(gallivm, " W: %f %f %f %f\n",
2128 v[3][0], v[3][1], v[3][2], v[3][3]);
2129 }
2130 }
2131
2132
2133
2134 void
2135 lp_emit_declaration_soa(
2136 struct lp_build_tgsi_context *bld_base,
2137 const struct tgsi_full_declaration *decl)
2138 {
2139 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2140 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2141 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2142 const unsigned first = decl->Range.First;
2143 const unsigned last = decl->Range.Last;
2144 unsigned idx, i;
2145
2146 for (idx = first; idx <= last; ++idx) {
2147 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2148 switch (decl->Declaration.File) {
2149 case TGSI_FILE_TEMPORARY:
2150 assert(idx < LP_MAX_TGSI_TEMPS);
2151 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2152 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2153 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2154 }
2155 break;
2156
2157 case TGSI_FILE_OUTPUT:
2158 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2159 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2160 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2161 vec_type, "output");
2162 }
2163 break;
2164
2165 case TGSI_FILE_ADDRESS:
2166 /* ADDR registers are only allocated with an integer LLVM IR type,
2167 * as they are guaranteed to always have integers.
2168 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2169 * an ADDR register for that matter).
2170 */
2171 assert(idx < LP_MAX_TGSI_ADDRS);
2172 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2173 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2174 break;
2175
2176 case TGSI_FILE_PREDICATE:
2177 assert(idx < LP_MAX_TGSI_PREDS);
2178 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2179 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2180 "predicate");
2181 break;
2182
2183 case TGSI_FILE_SAMPLER_VIEW:
2184 /*
2185 * The target stored here MUST match whatever there actually
2186 * is in the set sampler views (what about return type?).
2187 */
2188 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2189 bld->sv[idx] = decl->SamplerView;
2190 break;
2191
2192 default:
2193 /* don't need to declare other vars */
2194 break;
2195 }
2196 }
2197 }
2198
2199
2200 void lp_emit_immediate_soa(
2201 struct lp_build_tgsi_context *bld_base,
2202 const struct tgsi_full_immediate *imm)
2203 {
2204 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2205 struct gallivm_state * gallivm = bld_base->base.gallivm;
2206
2207 /* simply copy the immediate values into the next immediates[] slot */
2208 unsigned i;
2209 const uint size = imm->Immediate.NrTokens - 1;
2210 assert(size <= 4);
2211 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
2212 switch (imm->Immediate.DataType) {
2213 case TGSI_IMM_FLOAT32:
2214 for( i = 0; i < size; ++i )
2215 bld->immediates[bld->num_immediates][i] =
2216 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2217
2218 break;
2219 case TGSI_IMM_UINT32:
2220 for( i = 0; i < size; ++i ) {
2221 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2222 bld->immediates[bld->num_immediates][i] =
2223 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2224 }
2225
2226 break;
2227 case TGSI_IMM_INT32:
2228 for( i = 0; i < size; ++i ) {
2229 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2230 bld->immediates[bld->num_immediates][i] =
2231 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2232 }
2233
2234 break;
2235 }
2236 for( i = size; i < 4; ++i )
2237 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
2238
2239 bld->num_immediates++;
2240 }
2241
2242 static void
2243 ddx_emit(
2244 const struct lp_build_tgsi_action * action,
2245 struct lp_build_tgsi_context * bld_base,
2246 struct lp_build_emit_data * emit_data)
2247 {
2248 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2249
2250 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2251 &emit_data->output[emit_data->chan], NULL);
2252 }
2253
2254 static void
2255 ddy_emit(
2256 const struct lp_build_tgsi_action * action,
2257 struct lp_build_tgsi_context * bld_base,
2258 struct lp_build_emit_data * emit_data)
2259 {
2260 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2261
2262 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2263 &emit_data->output[emit_data->chan]);
2264 }
2265
2266 static void
2267 kilp_emit(
2268 const struct lp_build_tgsi_action * action,
2269 struct lp_build_tgsi_context * bld_base,
2270 struct lp_build_emit_data * emit_data)
2271 {
2272 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2273
2274 emit_kilp(bld, bld_base->pc - 1);
2275 }
2276
2277 static void
2278 kil_emit(
2279 const struct lp_build_tgsi_action * action,
2280 struct lp_build_tgsi_context * bld_base,
2281 struct lp_build_emit_data * emit_data)
2282 {
2283 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2284
2285 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
2286 }
2287
2288 static void
2289 tex_emit(
2290 const struct lp_build_tgsi_action * action,
2291 struct lp_build_tgsi_context * bld_base,
2292 struct lp_build_emit_data * emit_data)
2293 {
2294 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2295
2296 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2297 }
2298
2299 static void
2300 txb_emit(
2301 const struct lp_build_tgsi_action * action,
2302 struct lp_build_tgsi_context * bld_base,
2303 struct lp_build_emit_data * emit_data)
2304 {
2305 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2306
2307 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2308 emit_data->output);
2309 }
2310
2311 static void
2312 txd_emit(
2313 const struct lp_build_tgsi_action * action,
2314 struct lp_build_tgsi_context * bld_base,
2315 struct lp_build_emit_data * emit_data)
2316 {
2317 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2318
2319 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2320 emit_data->output);
2321 }
2322
2323 static void
2324 txl_emit(
2325 const struct lp_build_tgsi_action * action,
2326 struct lp_build_tgsi_context * bld_base,
2327 struct lp_build_emit_data * emit_data)
2328 {
2329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2330
2331 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2332 emit_data->output);
2333 }
2334
2335 static void
2336 txp_emit(
2337 const struct lp_build_tgsi_action * action,
2338 struct lp_build_tgsi_context * bld_base,
2339 struct lp_build_emit_data * emit_data)
2340 {
2341 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2342
2343 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2344 emit_data->output);
2345 }
2346
2347 static void
2348 txq_emit(
2349 const struct lp_build_tgsi_action * action,
2350 struct lp_build_tgsi_context * bld_base,
2351 struct lp_build_emit_data * emit_data)
2352 {
2353 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2354
2355 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2356 }
2357
2358 static void
2359 txf_emit(
2360 const struct lp_build_tgsi_action * action,
2361 struct lp_build_tgsi_context * bld_base,
2362 struct lp_build_emit_data * emit_data)
2363 {
2364 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2365
2366 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2367 }
2368
2369 static void
2370 sample_i_emit(
2371 const struct lp_build_tgsi_action * action,
2372 struct lp_build_tgsi_context * bld_base,
2373 struct lp_build_emit_data * emit_data)
2374 {
2375 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2376
2377 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2378 }
2379
2380 static void
2381 sample_emit(
2382 const struct lp_build_tgsi_action * action,
2383 struct lp_build_tgsi_context * bld_base,
2384 struct lp_build_emit_data * emit_data)
2385 {
2386 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2387
2388 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2389 FALSE, emit_data->output);
2390 }
2391
2392 static void
2393 sample_b_emit(
2394 const struct lp_build_tgsi_action * action,
2395 struct lp_build_tgsi_context * bld_base,
2396 struct lp_build_emit_data * emit_data)
2397 {
2398 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2399
2400 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2401 FALSE, emit_data->output);
2402 }
2403
2404 static void
2405 sample_c_emit(
2406 const struct lp_build_tgsi_action * action,
2407 struct lp_build_tgsi_context * bld_base,
2408 struct lp_build_emit_data * emit_data)
2409 {
2410 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2411
2412 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2413 TRUE, emit_data->output);
2414 }
2415
2416 static void
2417 sample_c_lz_emit(
2418 const struct lp_build_tgsi_action * action,
2419 struct lp_build_tgsi_context * bld_base,
2420 struct lp_build_emit_data * emit_data)
2421 {
2422 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2423
2424 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2425 TRUE, emit_data->output);
2426 }
2427
2428 static void
2429 sample_d_emit(
2430 const struct lp_build_tgsi_action * action,
2431 struct lp_build_tgsi_context * bld_base,
2432 struct lp_build_emit_data * emit_data)
2433 {
2434 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2435
2436 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2437 FALSE, emit_data->output);
2438 }
2439
2440 static void
2441 sample_l_emit(
2442 const struct lp_build_tgsi_action * action,
2443 struct lp_build_tgsi_context * bld_base,
2444 struct lp_build_emit_data * emit_data)
2445 {
2446 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2447
2448 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2449 FALSE, emit_data->output);
2450 }
2451
2452 static void
2453 sviewinfo_emit(
2454 const struct lp_build_tgsi_action * action,
2455 struct lp_build_tgsi_context * bld_base,
2456 struct lp_build_emit_data * emit_data)
2457 {
2458 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2459
2460 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2461 }
2462
2463 static LLVMValueRef
2464 mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
2465 {
2466 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2467 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2468 LLVMValueRef one_vec = bld_base->int_bld.one;
2469 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2470
2471 if (exec_mask->has_mask) {
2472 one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
2473 }
2474 one_vec = LLVMBuildAnd(builder, one_vec,
2475 lp_build_mask_value(bld->mask), "");
2476 return one_vec;
2477 }
2478
2479 static void
2480 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2481 LLVMValueRef ptr,
2482 LLVMValueRef mask)
2483 {
2484 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2485
2486 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2487
2488 current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
2489
2490 LLVMBuildStore(builder, current_vec, ptr);
2491 }
2492
2493 static void
2494 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2495 LLVMValueRef ptr,
2496 LLVMValueRef mask)
2497 {
2498 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2499
2500 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2501 LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
2502 PIPE_FUNC_NOTEQUAL,
2503 mask,
2504 bld_base->uint_bld.zero);
2505
2506 current_vec = lp_build_select(&bld_base->uint_bld,
2507 full_mask,
2508 bld_base->uint_bld.zero,
2509 current_vec);
2510
2511 LLVMBuildStore(builder, current_vec, ptr);
2512 }
2513
2514 static LLVMValueRef
2515 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
2516 LLVMValueRef current_mask_vec,
2517 LLVMValueRef total_emitted_vertices_vec)
2518 {
2519 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2520 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2521 LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
2522 total_emitted_vertices_vec,
2523 bld->max_output_vertices_vec);
2524
2525 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2526 }
2527
2528 static void
2529 emit_vertex(
2530 const struct lp_build_tgsi_action * action,
2531 struct lp_build_tgsi_context * bld_base,
2532 struct lp_build_emit_data * emit_data)
2533 {
2534 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2535 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2536
2537 if (bld->gs_iface->emit_vertex) {
2538 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2539 LLVMValueRef total_emitted_vertices_vec =
2540 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2541 masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
2542 total_emitted_vertices_vec);
2543 gather_outputs(bld);
2544 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2545 bld->outputs,
2546 total_emitted_vertices_vec);
2547 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2548 masked_ones);
2549 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2550 masked_ones);
2551 #if DUMP_GS_EMITS
2552 lp_build_print_value(bld->bld_base.base.gallivm,
2553 " +++ emit vertex masked ones = ",
2554 masked_ones);
2555 lp_build_print_value(bld->bld_base.base.gallivm,
2556 " +++ emit vertex emitted = ",
2557 total_emitted_vertices_vec);
2558 #endif
2559 }
2560 }
2561
2562
2563 static void
2564 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
2565 LLVMValueRef masked_ones)
2566 {
2567 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2568 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2569
2570 if (bld->gs_iface->end_primitive) {
2571 LLVMValueRef emitted_vertices_vec =
2572 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2573 LLVMValueRef emitted_prims_vec =
2574 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2575
2576 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2577 emitted_vertices_vec,
2578 emitted_prims_vec);
2579
2580 #if DUMP_GS_EMITS
2581 lp_build_print_value(bld->bld_base.base.gallivm,
2582 " +++ end prim masked ones = ",
2583 masked_ones);
2584 lp_build_print_value(bld->bld_base.base.gallivm,
2585 " +++ end prim emitted verts1 = ",
2586 emitted_vertices_vec);
2587 lp_build_print_value(bld->bld_base.base.gallivm,
2588 " +++ end prim emitted prims1 = ",
2589 LLVMBuildLoad(builder,
2590 bld->emitted_prims_vec_ptr, ""));
2591 #endif
2592 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2593 masked_ones);
2594 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2595 masked_ones);
2596 #if DUMP_GS_EMITS
2597 lp_build_print_value(bld->bld_base.base.gallivm,
2598 " +++ end prim emitted verts2 = ",
2599 LLVMBuildLoad(builder,
2600 bld->emitted_vertices_vec_ptr, ""));
2601 #endif
2602 }
2603
2604 }
2605
2606 static void
2607 end_primitive(
2608 const struct lp_build_tgsi_action * action,
2609 struct lp_build_tgsi_context * bld_base,
2610 struct lp_build_emit_data * emit_data)
2611 {
2612 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2613
2614 if (bld->gs_iface->end_primitive) {
2615 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2616 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2617 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2618 LLVMValueRef emitted_verts = LLVMBuildLoad(
2619 builder, bld->emitted_vertices_vec_ptr, "");
2620 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2621 emitted_verts,
2622 uint_bld->zero);
2623 /* We need to combine the current execution mask with the mask
2624 telling us which, if any, execution slots actually have
2625 unemitted primitives, this way we make sure that end_primitives
2626 executes only on the paths that have unflushed vertices */
2627 masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
2628
2629 end_primitive_masked(bld_base, masked_ones);
2630 }
2631 }
2632
2633 static void
2634 cal_emit(
2635 const struct lp_build_tgsi_action * action,
2636 struct lp_build_tgsi_context * bld_base,
2637 struct lp_build_emit_data * emit_data)
2638 {
2639 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2640
2641 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2642 &bld_base->pc);
2643 }
2644
2645 static void
2646 ret_emit(
2647 const struct lp_build_tgsi_action * action,
2648 struct lp_build_tgsi_context * bld_base,
2649 struct lp_build_emit_data * emit_data)
2650 {
2651 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2652
2653 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2654 }
2655
2656 static void
2657 brk_emit(
2658 const struct lp_build_tgsi_action * action,
2659 struct lp_build_tgsi_context * bld_base,
2660 struct lp_build_emit_data * emit_data)
2661 {
2662 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2663
2664 lp_exec_break(&bld->exec_mask, bld_base);
2665 }
2666
2667 static void
2668 breakc_emit(
2669 const struct lp_build_tgsi_action * action,
2670 struct lp_build_tgsi_context * bld_base,
2671 struct lp_build_emit_data * emit_data)
2672 {
2673 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2674 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2675 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2676 LLVMValueRef unsigned_cond =
2677 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
2678 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2679 unsigned_cond,
2680 uint_bld->zero);
2681
2682 lp_exec_break_condition(&bld->exec_mask, cond);
2683 }
2684
2685 static void
2686 if_emit(
2687 const struct lp_build_tgsi_action * action,
2688 struct lp_build_tgsi_context * bld_base,
2689 struct lp_build_emit_data * emit_data)
2690 {
2691 LLVMValueRef tmp;
2692 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2693
2694 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2695 emit_data->args[0], bld->bld_base.base.zero);
2696 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2697 }
2698
2699 static void
2700 uif_emit(
2701 const struct lp_build_tgsi_action * action,
2702 struct lp_build_tgsi_context * bld_base,
2703 struct lp_build_emit_data * emit_data)
2704 {
2705 LLVMValueRef tmp;
2706 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2707 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2708
2709 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2710 emit_data->args[0], uint_bld->zero);
2711 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2712 }
2713
2714 static void
2715 case_emit(
2716 const struct lp_build_tgsi_action * action,
2717 struct lp_build_tgsi_context * bld_base,
2718 struct lp_build_emit_data * emit_data)
2719 {
2720 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2721
2722 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
2723 }
2724
2725 static void
2726 default_emit(
2727 const struct lp_build_tgsi_action * action,
2728 struct lp_build_tgsi_context * bld_base,
2729 struct lp_build_emit_data * emit_data)
2730 {
2731 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2732
2733 lp_exec_default(&bld->exec_mask, bld_base);
2734 }
2735
2736 static void
2737 switch_emit(
2738 const struct lp_build_tgsi_action * action,
2739 struct lp_build_tgsi_context * bld_base,
2740 struct lp_build_emit_data * emit_data)
2741 {
2742 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2743
2744 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
2745 }
2746
2747 static void
2748 endswitch_emit(
2749 const struct lp_build_tgsi_action * action,
2750 struct lp_build_tgsi_context * bld_base,
2751 struct lp_build_emit_data * emit_data)
2752 {
2753 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2754
2755 lp_exec_endswitch(&bld->exec_mask, bld_base);
2756 }
2757
2758 static void
2759 bgnloop_emit(
2760 const struct lp_build_tgsi_action * action,
2761 struct lp_build_tgsi_context * bld_base,
2762 struct lp_build_emit_data * emit_data)
2763 {
2764 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2765
2766 lp_exec_bgnloop(&bld->exec_mask);
2767 }
2768
2769 static void
2770 bgnsub_emit(
2771 const struct lp_build_tgsi_action * action,
2772 struct lp_build_tgsi_context * bld_base,
2773 struct lp_build_emit_data * emit_data)
2774 {
2775 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2776
2777 lp_exec_mask_bgnsub(&bld->exec_mask);
2778 }
2779
2780 static void
2781 else_emit(
2782 const struct lp_build_tgsi_action * action,
2783 struct lp_build_tgsi_context * bld_base,
2784 struct lp_build_emit_data * emit_data)
2785 {
2786 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2787
2788 lp_exec_mask_cond_invert(&bld->exec_mask);
2789 }
2790
2791 static void
2792 endif_emit(
2793 const struct lp_build_tgsi_action * action,
2794 struct lp_build_tgsi_context * bld_base,
2795 struct lp_build_emit_data * emit_data)
2796 {
2797 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2798
2799 lp_exec_mask_cond_pop(&bld->exec_mask);
2800 }
2801
2802 static void
2803 endloop_emit(
2804 const struct lp_build_tgsi_action * action,
2805 struct lp_build_tgsi_context * bld_base,
2806 struct lp_build_emit_data * emit_data)
2807 {
2808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2809
2810 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
2811 }
2812
2813 static void
2814 endsub_emit(
2815 const struct lp_build_tgsi_action * action,
2816 struct lp_build_tgsi_context * bld_base,
2817 struct lp_build_emit_data * emit_data)
2818 {
2819 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2820
2821 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
2822 }
2823
2824 static void
2825 cont_emit(
2826 const struct lp_build_tgsi_action * action,
2827 struct lp_build_tgsi_context * bld_base,
2828 struct lp_build_emit_data * emit_data)
2829 {
2830 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2831
2832 lp_exec_continue(&bld->exec_mask);
2833 }
2834
2835 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2836 *
2837 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2838 * from old code, but there is no garauntee that LLVM will use those registers
2839 * for this code.
2840 *
2841 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2842 * should be handled by the emit_data->fetch_args function. */
2843 static void
2844 nrm_emit(
2845 const struct lp_build_tgsi_action * action,
2846 struct lp_build_tgsi_context * bld_base,
2847 struct lp_build_emit_data * emit_data)
2848 {
2849 LLVMValueRef tmp0, tmp1;
2850 LLVMValueRef tmp4 = NULL;
2851 LLVMValueRef tmp5 = NULL;
2852 LLVMValueRef tmp6 = NULL;
2853 LLVMValueRef tmp7 = NULL;
2854 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2855
2856 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2857
2858 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2859 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2860 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2861 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2862
2863 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2864
2865 /* xmm4 = src.x */
2866 /* xmm0 = src.x * src.x */
2867 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2868 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2869 tmp4 = tmp0;
2870 }
2871 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
2872
2873 /* xmm5 = src.y */
2874 /* xmm0 = xmm0 + src.y * src.y */
2875 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
2876 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2877 tmp5 = tmp1;
2878 }
2879 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2880 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2881
2882 /* xmm6 = src.z */
2883 /* xmm0 = xmm0 + src.z * src.z */
2884 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
2885 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2886 tmp6 = tmp1;
2887 }
2888 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2889 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2890
2891 if (dims == 4) {
2892 /* xmm7 = src.w */
2893 /* xmm0 = xmm0 + src.w * src.w */
2894 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2895 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
2896 tmp7 = tmp1;
2897 }
2898 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2899 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2900 }
2901 /* xmm1 = 1 / sqrt(xmm0) */
2902 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
2903 /* dst.x = xmm1 * src.x */
2904 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2905 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
2906 }
2907 /* dst.y = xmm1 * src.y */
2908 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2909 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
2910 }
2911
2912 /* dst.z = xmm1 * src.z */
2913 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2914 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
2915 }
2916 /* dst.w = xmm1 * src.w */
2917 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
2918 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
2919 }
2920 }
2921
2922 /* dst.w = 1.0 */
2923 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
2924 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
2925 }
2926 }
2927
2928 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
2929 {
2930 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2931 struct gallivm_state * gallivm = bld_base->base.gallivm;
2932
2933 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2934 LLVMValueRef array_size =
2935 lp_build_const_int32(gallivm,
2936 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2937 bld->temps_array = lp_build_array_alloca(gallivm,
2938 bld_base->base.vec_type, array_size,
2939 "temp_array");
2940 }
2941
2942 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2943 LLVMValueRef array_size =
2944 lp_build_const_int32(gallivm,
2945 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2946 bld->outputs_array = lp_build_array_alloca(gallivm,
2947 bld_base->base.vec_type, array_size,
2948 "output_array");
2949 }
2950
2951 /* If we have indirect addressing in inputs we need to copy them into
2952 * our alloca array to be able to iterate over them */
2953 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
2954 unsigned index, chan;
2955 LLVMTypeRef vec_type = bld_base->base.vec_type;
2956 LLVMValueRef array_size = lp_build_const_int32(gallivm,
2957 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2958 bld->inputs_array = lp_build_array_alloca(gallivm,
2959 vec_type, array_size,
2960 "input_array");
2961
2962 assert(bld_base->info->num_inputs
2963 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2964
2965 for (index = 0; index < bld_base->info->num_inputs; ++index) {
2966 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2967 LLVMValueRef lindex =
2968 lp_build_const_int32(gallivm, index * 4 + chan);
2969 LLVMValueRef input_ptr =
2970 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2971 &lindex, 1, "");
2972 LLVMValueRef value = bld->inputs[index][chan];
2973 if (value)
2974 LLVMBuildStore(gallivm->builder, value, input_ptr);
2975 }
2976 }
2977 }
2978
2979 if (bld->gs_iface) {
2980 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2981 bld->emitted_prims_vec_ptr =
2982 lp_build_alloca(gallivm,
2983 uint_bld->vec_type,
2984 "emitted_prims_ptr");
2985 bld->emitted_vertices_vec_ptr =
2986 lp_build_alloca(gallivm,
2987 uint_bld->vec_type,
2988 "emitted_vertices_ptr");
2989 bld->total_emitted_vertices_vec_ptr =
2990 lp_build_alloca(gallivm,
2991 uint_bld->vec_type,
2992 "total_emitted_vertices_ptr");
2993
2994 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2995 bld->emitted_prims_vec_ptr);
2996 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2997 bld->emitted_vertices_vec_ptr);
2998 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2999 bld->total_emitted_vertices_vec_ptr);
3000 }
3001 }
3002
3003 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3004 {
3005 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3006 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3007
3008 if (0) {
3009 /* for debugging */
3010 emit_dump_temps(bld);
3011 }
3012
3013 /* If we have indirect addressing in outputs we need to copy our alloca array
3014 * to the outputs slots specified by the caller */
3015 if (bld->gs_iface) {
3016 LLVMValueRef total_emitted_vertices_vec;
3017 LLVMValueRef emitted_prims_vec;
3018 /* implicit end_primitives, needed in case there are any unflushed
3019 vertices in the cache */
3020 end_primitive(NULL, bld_base, NULL);
3021
3022 total_emitted_vertices_vec =
3023 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3024 emitted_prims_vec =
3025 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3026
3027 bld->gs_iface->gs_epilogue(bld->gs_iface,
3028 &bld->bld_base,
3029 total_emitted_vertices_vec,
3030 emitted_prims_vec);
3031 } else {
3032 gather_outputs(bld);
3033 }
3034 }
3035
3036 void
3037 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3038 const struct tgsi_token *tokens,
3039 struct lp_type type,
3040 struct lp_build_mask_context *mask,
3041 LLVMValueRef consts_ptr,
3042 const struct lp_bld_tgsi_system_values *system_values,
3043 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3044 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3045 struct lp_build_sampler_soa *sampler,
3046 const struct tgsi_shader_info *info,
3047 const struct lp_build_tgsi_gs_iface *gs_iface)
3048 {
3049 struct lp_build_tgsi_soa_context bld;
3050
3051 struct lp_type res_type;
3052
3053 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3054 memset(&res_type, 0, sizeof res_type);
3055 res_type.width = type.width;
3056 res_type.length = type.length;
3057 res_type.sign = 1;
3058
3059 /* Setup build context */
3060 memset(&bld, 0, sizeof bld);
3061 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3062 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3063 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3064 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3065 bld.mask = mask;
3066 bld.inputs = inputs;
3067 bld.outputs = outputs;
3068 bld.consts_ptr = consts_ptr;
3069 bld.sampler = sampler;
3070 bld.bld_base.info = info;
3071 bld.indirect_files = info->indirect_files;
3072
3073 bld.bld_base.soa = TRUE;
3074 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3075 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3076 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3077 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3078 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3079 bld.bld_base.emit_store = emit_store;
3080
3081 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3082 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3083
3084 bld.bld_base.emit_prologue = emit_prologue;
3085 bld.bld_base.emit_epilogue = emit_epilogue;
3086
3087 /* Set opcode actions */
3088 lp_set_default_actions_cpu(&bld.bld_base);
3089
3090 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3091 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3092 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3093 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3094 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3095 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3096 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3097 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3098 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3099 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3100 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3101 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3102 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3103 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3104 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3105 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3106 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3107 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
3108 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
3109 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
3110 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
3111 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3112 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3113 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3114 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3115 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3116 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3117 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3118 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3119 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3120 /* DX10 sampling ops */
3121 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3122 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3123 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3124 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3125 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3126 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3127 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3128 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3129
3130 if (gs_iface) {
3131 /* There's no specific value for this because it should always
3132 * be set, but apps using ext_geometry_shader4 quite often
3133 * were forgetting so we're using MAX_VERTEX_VARYING from
3134 * that spec even though we could debug_assert if it's not
3135 * set, but that's a lot uglier. */
3136 uint max_output_vertices = 32;
3137 uint i = 0;
3138 /* inputs are always indirect with gs */
3139 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3140 bld.gs_iface = gs_iface;
3141 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3142 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3143 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3144
3145 for (i = 0; i < info->num_properties; ++i) {
3146 if (info->properties[i].name ==
3147 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
3148 max_output_vertices = info->properties[i].data[0];
3149 }
3150 }
3151 bld.max_output_vertices_vec =
3152 lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
3153 max_output_vertices);
3154 }
3155
3156 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3157
3158 bld.system_values = *system_values;
3159
3160 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3161
3162 if (0) {
3163 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3164 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3165 debug_printf("11111111111111111111111111111 \n");
3166 tgsi_dump(tokens, 0);
3167 lp_debug_dump_value(function);
3168 debug_printf("2222222222222222222222222222 \n");
3169 }
3170
3171 if (0) {
3172 LLVMModuleRef module = LLVMGetGlobalParent(
3173 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3174 LLVMDumpModule(module);
3175
3176 }
3177 }