75f6defe1011c2c32ffb66d2d0a432d94cc4b636
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68 #define DUMP_GS_EMITS 0
69
70 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
71 {
72 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
73 LLVMBuilderRef builder = bld->gallivm->builder;
74
75 mask->bld = bld;
76 mask->has_mask = FALSE;
77 mask->ret_in_main = FALSE;
78 mask->cond_stack_size = 0;
79 mask->loop_stack_size = 0;
80 mask->call_stack_size = 0;
81 mask->switch_stack_size = 0;
82
83 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
84 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
85 mask->cond_mask = mask->switch_mask =
86 LLVMConstAllOnes(mask->int_vec_type);
87
88 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
89
90 LLVMBuildStore(
91 builder,
92 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
93 mask->loop_limiter);
94 }
95
96 static void lp_exec_mask_update(struct lp_exec_mask *mask)
97 {
98 LLVMBuilderRef builder = mask->bld->gallivm->builder;
99
100 if (mask->loop_stack_size) {
101 /*for loops we need to update the entire mask at runtime */
102 LLVMValueRef tmp;
103 assert(mask->break_mask);
104 tmp = LLVMBuildAnd(builder,
105 mask->cont_mask,
106 mask->break_mask,
107 "maskcb");
108 mask->exec_mask = LLVMBuildAnd(builder,
109 mask->cond_mask,
110 tmp,
111 "maskfull");
112 } else
113 mask->exec_mask = mask->cond_mask;
114
115 if (mask->switch_stack_size) {
116 mask->exec_mask = LLVMBuildAnd(builder,
117 mask->exec_mask,
118 mask->switch_mask,
119 "switchmask");
120 }
121
122 if (mask->call_stack_size || mask->ret_in_main) {
123 mask->exec_mask = LLVMBuildAnd(builder,
124 mask->exec_mask,
125 mask->ret_mask,
126 "callmask");
127 }
128
129 mask->has_mask = (mask->cond_stack_size > 0 ||
130 mask->loop_stack_size > 0 ||
131 mask->call_stack_size > 0 ||
132 mask->switch_stack_size > 0 ||
133 mask->ret_in_main);
134 }
135
136 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
137 LLVMValueRef val)
138 {
139 LLVMBuilderRef builder = mask->bld->gallivm->builder;
140
141 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
142 if (mask->cond_stack_size == 0) {
143 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
144 }
145 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
146 assert(LLVMTypeOf(val) == mask->int_vec_type);
147 mask->cond_mask = LLVMBuildAnd(builder,
148 mask->cond_mask,
149 val,
150 "");
151 lp_exec_mask_update(mask);
152 }
153
154 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
155 {
156 LLVMBuilderRef builder = mask->bld->gallivm->builder;
157 LLVMValueRef prev_mask;
158 LLVMValueRef inv_mask;
159
160 assert(mask->cond_stack_size);
161 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
162 if (mask->cond_stack_size == 1) {
163 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
164 }
165
166 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
167
168 mask->cond_mask = LLVMBuildAnd(builder,
169 inv_mask,
170 prev_mask, "");
171 lp_exec_mask_update(mask);
172 }
173
174 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
175 {
176 assert(mask->cond_stack_size);
177 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
178 lp_exec_mask_update(mask);
179 }
180
181 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
182 {
183 LLVMBuilderRef builder = mask->bld->gallivm->builder;
184
185 if (mask->loop_stack_size == 0) {
186 assert(mask->loop_block == NULL);
187 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
188 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
189 assert(mask->break_var == NULL);
190 }
191
192 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
193
194 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
195 mask->break_type;
196 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
197
198 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
199 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
200 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
201 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
202 ++mask->loop_stack_size;
203
204 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
205 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
206
207 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
208
209 LLVMBuildBr(builder, mask->loop_block);
210 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
211
212 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
213
214 lp_exec_mask_update(mask);
215 }
216
217 static void lp_exec_break(struct lp_exec_mask *mask,
218 struct lp_build_tgsi_context * bld_base)
219 {
220 LLVMBuilderRef builder = mask->bld->gallivm->builder;
221
222 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
223 LLVMValueRef exec_mask = LLVMBuildNot(builder,
224 mask->exec_mask,
225 "break");
226
227 mask->break_mask = LLVMBuildAnd(builder,
228 mask->break_mask,
229 exec_mask, "break_full");
230 }
231 else {
232 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
233 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
234 opcode == TGSI_OPCODE_CASE);
235
236
237 if (mask->switch_in_default) {
238 /*
239 * stop default execution but only if this is an unconditional switch.
240 * (The condition here is not perfect since dead code after break is
241 * allowed but should be sufficient since false negatives are just
242 * unoptimized - so we don't have to pre-evaluate that).
243 */
244 if(break_always && mask->switch_pc) {
245 bld_base->pc = mask->switch_pc;
246 return;
247 }
248 }
249
250 if (break_always) {
251 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
252 }
253 else {
254 LLVMValueRef exec_mask = LLVMBuildNot(builder,
255 mask->exec_mask,
256 "break");
257 mask->switch_mask = LLVMBuildAnd(builder,
258 mask->switch_mask,
259 exec_mask, "break_switch");
260 }
261 }
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_break_condition(struct lp_exec_mask *mask,
267 LLVMValueRef cond)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
271 mask->exec_mask,
272 cond, "cond_mask");
273 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
274
275 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
276 mask->break_mask = LLVMBuildAnd(builder,
277 mask->break_mask,
278 cond_mask, "breakc_full");
279 }
280 else {
281 mask->switch_mask = LLVMBuildAnd(builder,
282 mask->switch_mask,
283 cond_mask, "breakc_switch");
284 }
285
286 lp_exec_mask_update(mask);
287 }
288
289 static void lp_exec_continue(struct lp_exec_mask *mask)
290 {
291 LLVMBuilderRef builder = mask->bld->gallivm->builder;
292 LLVMValueRef exec_mask = LLVMBuildNot(builder,
293 mask->exec_mask,
294 "");
295
296 mask->cont_mask = LLVMBuildAnd(builder,
297 mask->cont_mask,
298 exec_mask, "");
299
300 lp_exec_mask_update(mask);
301 }
302
303
304 static void lp_exec_endloop(struct gallivm_state *gallivm,
305 struct lp_exec_mask *mask)
306 {
307 LLVMBuilderRef builder = mask->bld->gallivm->builder;
308 LLVMBasicBlockRef endloop;
309 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
310 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
311 mask->bld->type.width *
312 mask->bld->type.length);
313 LLVMValueRef i1cond, i2cond, icond, limiter;
314
315 assert(mask->break_mask);
316
317 /*
318 * Restore the cont_mask, but don't pop
319 */
320 assert(mask->loop_stack_size);
321 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
322 lp_exec_mask_update(mask);
323
324 /*
325 * Unlike the continue mask, the break_mask must be preserved across loop
326 * iterations
327 */
328 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
329
330 /* Decrement the loop limiter */
331 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
332
333 limiter = LLVMBuildSub(
334 builder,
335 limiter,
336 LLVMConstInt(int_type, 1, false),
337 "");
338
339 LLVMBuildStore(builder, limiter, mask->loop_limiter);
340
341 /* i1cond = (mask != 0) */
342 i1cond = LLVMBuildICmp(
343 builder,
344 LLVMIntNE,
345 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
346 LLVMConstNull(reg_type), "i1cond");
347
348 /* i2cond = (looplimiter > 0) */
349 i2cond = LLVMBuildICmp(
350 builder,
351 LLVMIntSGT,
352 limiter,
353 LLVMConstNull(int_type), "i2cond");
354
355 /* if( i1cond && i2cond ) */
356 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
357
358 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
359
360 LLVMBuildCondBr(builder,
361 icond, mask->loop_block, endloop);
362
363 LLVMPositionBuilderAtEnd(builder, endloop);
364
365 assert(mask->loop_stack_size);
366 --mask->loop_stack_size;
367 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
368 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
369 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
370 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
371 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
372
373 lp_exec_mask_update(mask);
374 }
375
376 static void lp_exec_switch(struct lp_exec_mask *mask,
377 LLVMValueRef switchval)
378 {
379 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
380 mask->break_type;
381 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
382
383 mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
384 mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
385 mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
386 mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
387 mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
388 mask->switch_stack_size++;
389
390 mask->switch_val = switchval;
391 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
392 mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
393 mask->switch_in_default = false;
394 mask->switch_pc = 0;
395
396 lp_exec_mask_update(mask);
397 }
398
399 static void lp_exec_endswitch(struct lp_exec_mask *mask,
400 struct lp_build_tgsi_context * bld_base)
401 {
402 LLVMBuilderRef builder = mask->bld->gallivm->builder;
403
404 /* check if there's deferred default if so do it now */
405 if (mask->switch_pc && !mask->switch_in_default) {
406 LLVMValueRef prevmask, defaultmask;
407 unsigned tmp_pc;
408 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
409 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
410 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
411 mask->switch_in_default = true;
412
413 lp_exec_mask_update(mask);
414
415 assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
416 TGSI_OPCODE_DEFAULT);
417
418 tmp_pc = bld_base->pc;
419 bld_base->pc = mask->switch_pc;
420 /*
421 * re-purpose switch_pc to point to here again, since we stop execution of
422 * the deferred default after next break.
423 */
424 mask->switch_pc = tmp_pc - 1;
425
426 return;
427 }
428
429 else if (mask->switch_pc && mask->switch_in_default) {
430 assert(bld_base->pc == mask->switch_pc + 1);
431 }
432
433 mask->switch_stack_size--;
434 mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
435 mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
436 mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
437 mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
438 mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
439
440 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
441
442 lp_exec_mask_update(mask);
443 }
444
445 static void lp_exec_case(struct lp_exec_mask *mask,
446 LLVMValueRef caseval)
447 {
448 LLVMBuilderRef builder = mask->bld->gallivm->builder;
449
450 LLVMValueRef casemask, prevmask;
451
452 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
453 if (!mask->switch_in_default) {
454 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
455 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
456 mask->switch_mask_default = LLVMBuildOr(builder, casemask,
457 mask->switch_mask_default, "sw_default_mask");
458 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
459 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
460
461 lp_exec_mask_update(mask);
462 }
463 }
464
465 /*
466 * Analyse default statement in a switch.
467 * \return true if default is last statement, false otherwise
468 * \param default_pc_start contains pc of instruction to jump to
469 * if default wasn't last but there's no
470 * fallthrough into default.
471 */
472 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
473 struct lp_build_tgsi_context * bld_base,
474 int *default_pc_start)
475 {
476 unsigned pc = bld_base->pc;
477 unsigned curr_switch_stack = mask->switch_stack_size;
478
479 /* skip over case statements which are together with default */
480 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
481 pc++;
482 }
483
484 while (pc != -1 && pc < bld_base->num_instructions) {
485 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
486 switch (opcode) {
487 case TGSI_OPCODE_CASE:
488 if (curr_switch_stack == mask->switch_stack_size) {
489 *default_pc_start = pc - 1;
490 return false;
491 }
492 break;
493 case TGSI_OPCODE_SWITCH:
494 curr_switch_stack++;
495 break;
496 case TGSI_OPCODE_ENDSWITCH:
497 if (curr_switch_stack == mask->switch_stack_size) {
498 *default_pc_start = pc - 1;
499 return true;
500 }
501 curr_switch_stack--;
502 break;
503 }
504 pc++;
505 }
506 /* should never arrive here */
507 assert(0);
508 return true;
509 }
510
511 static void lp_exec_default(struct lp_exec_mask *mask,
512 struct lp_build_tgsi_context * bld_base)
513 {
514 LLVMBuilderRef builder = mask->bld->gallivm->builder;
515
516 int default_exec_pc;
517 boolean default_is_last;
518
519 /*
520 * This is a messy opcode, because it may not be always at the end and
521 * there can be fallthrough in and out of it.
522 */
523
524 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
525 /*
526 * If it is last statement in switch (note that case statements appearing
527 * "at the same time" as default don't change that) everything is just fine,
528 * update switch mask and go on. This means we can handle default with
529 * fallthrough INTO it without overhead, if it is last.
530 */
531 if (default_is_last) {
532 LLVMValueRef prevmask, defaultmask;
533 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
534 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
535 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
536 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
537 mask->switch_in_default = true;
538
539 lp_exec_mask_update(mask);
540 }
541 else {
542 /*
543 * Technically, "case" immediately before default isn't really a
544 * fallthrough, however we still have to count them as such as we
545 * already have updated the masks.
546 * If that happens in practice could add a switch optimizer pass
547 * which just gets rid of all case statements appearing together with
548 * default (or could do switch analysis at switch start time instead).
549 */
550 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
551 boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
552 opcode != TGSI_OPCODE_SWITCH);
553 /*
554 * If it is not last statement and there was no fallthrough into it,
555 * we record the PC and continue execution at next case (again, those
556 * case encountered at the same time don't count). At endswitch
557 * time, we update switchmask, and go back executing the code we skipped
558 * until the next break (possibly re-executing some code with changed mask
559 * if there was a fallthrough out of default).
560 * Finally, if it is not last statement and there was a fallthrough into it,
561 * do the same as with the former case, except instead of skipping the code
562 * just execute it without updating the mask, then go back and re-execute.
563 */
564 mask->switch_pc = bld_base->pc;
565 if (!ft_into) {
566 bld_base->pc = default_exec_pc;
567 }
568 }
569 }
570
571
572 /* stores val into an address pointed to by dst_ptr.
573 * mask->exec_mask is used to figure out which bits of val
574 * should be stored into the address
575 * (0 means don't store this bit, 1 means do store).
576 */
577 static void lp_exec_mask_store(struct lp_exec_mask *mask,
578 struct lp_build_context *bld_store,
579 LLVMValueRef pred,
580 LLVMValueRef val,
581 LLVMValueRef dst_ptr)
582 {
583 LLVMBuilderRef builder = mask->bld->gallivm->builder;
584
585 assert(lp_check_value(bld_store->type, val));
586 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
587 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
588
589 /* Mix the predicate and execution mask */
590 if (mask->has_mask) {
591 if (pred) {
592 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
593 } else {
594 pred = mask->exec_mask;
595 }
596 }
597
598 if (pred) {
599 LLVMValueRef res, dst;
600
601 dst = LLVMBuildLoad(builder, dst_ptr, "");
602 res = lp_build_select(bld_store, pred, val, dst);
603 LLVMBuildStore(builder, res, dst_ptr);
604 } else
605 LLVMBuildStore(builder, val, dst_ptr);
606 }
607
608 static void lp_exec_mask_call(struct lp_exec_mask *mask,
609 int func,
610 int *pc)
611 {
612 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
613 mask->call_stack[mask->call_stack_size].pc = *pc;
614 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
615 mask->call_stack_size++;
616 *pc = func;
617 }
618
619 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
620 {
621 LLVMBuilderRef builder = mask->bld->gallivm->builder;
622 LLVMValueRef exec_mask;
623
624 if (mask->cond_stack_size == 0 &&
625 mask->loop_stack_size == 0 &&
626 mask->switch_stack_size == 0 &&
627 mask->call_stack_size == 0) {
628 /* returning from main() */
629 *pc = -1;
630 return;
631 }
632
633 if (mask->call_stack_size == 0) {
634 /*
635 * This requires special handling since we need to ensure
636 * we don't drop the mask even if we have no call stack
637 * (e.g. after a ret in a if clause after the endif)
638 */
639 mask->ret_in_main = TRUE;
640 }
641
642 exec_mask = LLVMBuildNot(builder,
643 mask->exec_mask,
644 "ret");
645
646 mask->ret_mask = LLVMBuildAnd(builder,
647 mask->ret_mask,
648 exec_mask, "ret_full");
649
650 lp_exec_mask_update(mask);
651 }
652
653 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
654 {
655 }
656
657 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
658 {
659 assert(mask->call_stack_size);
660 mask->call_stack_size--;
661 *pc = mask->call_stack[mask->call_stack_size].pc;
662 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
663 lp_exec_mask_update(mask);
664 }
665
666
667 /**
668 * Return pointer to a temporary register channel (src or dest).
669 * Note that indirect addressing cannot be handled here.
670 * \param index which temporary register
671 * \param chan which channel of the temp register.
672 */
673 LLVMValueRef
674 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
675 unsigned index,
676 unsigned chan)
677 {
678 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
679 assert(chan < 4);
680 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
681 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
682 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
683 }
684 else {
685 return bld->temps[index][chan];
686 }
687 }
688
689 /**
690 * Return pointer to a output register channel (src or dest).
691 * Note that indirect addressing cannot be handled here.
692 * \param index which output register
693 * \param chan which channel of the output register.
694 */
695 LLVMValueRef
696 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
697 unsigned index,
698 unsigned chan)
699 {
700 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
701 assert(chan < 4);
702 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
703 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
704 index * 4 + chan);
705 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
706 }
707 else {
708 return bld->outputs[index][chan];
709 }
710 }
711
712 /*
713 * If we have indirect addressing in outputs copy our alloca array
714 * to the outputs slots specified by the caller to make sure
715 * our outputs are delivered consistently via the same interface.
716 */
717 static void
718 gather_outputs(struct lp_build_tgsi_soa_context * bld)
719 {
720 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
721 unsigned index, chan;
722 assert(bld->bld_base.info->num_outputs <=
723 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
724 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
725 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
726 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
727 }
728 }
729 }
730 }
731
732 /**
733 * Gather vector.
734 * XXX the lp_build_gather() function should be capable of doing this
735 * with a little work.
736 */
737 static LLVMValueRef
738 build_gather(struct lp_build_context *bld,
739 LLVMValueRef base_ptr,
740 LLVMValueRef indexes)
741 {
742 LLVMBuilderRef builder = bld->gallivm->builder;
743 LLVMValueRef res = bld->undef;
744 unsigned i;
745
746 /*
747 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
748 */
749 for (i = 0; i < bld->type.length; i++) {
750 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
751 LLVMValueRef index = LLVMBuildExtractElement(builder,
752 indexes, ii, "");
753 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
754 &index, 1, "gather_ptr");
755 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
756
757 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
758 }
759
760 return res;
761 }
762
763
764 /**
765 * Scatter/store vector.
766 */
767 static void
768 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
769 LLVMValueRef base_ptr,
770 LLVMValueRef indexes,
771 LLVMValueRef values,
772 struct lp_exec_mask *mask,
773 LLVMValueRef pred)
774 {
775 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
776 LLVMBuilderRef builder = gallivm->builder;
777 unsigned i;
778
779 /* Mix the predicate and execution mask */
780 if (mask->has_mask) {
781 if (pred) {
782 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
783 }
784 else {
785 pred = mask->exec_mask;
786 }
787 }
788
789 /*
790 * Loop over elements of index_vec, store scalar value.
791 */
792 for (i = 0; i < bld->bld_base.base.type.length; i++) {
793 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
794 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
795 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
796 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
797 LLVMValueRef scalar_pred = pred ?
798 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
799
800 if (0)
801 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
802 ii, val, index, scalar_ptr);
803
804 if (scalar_pred) {
805 LLVMValueRef real_val, dst_val;
806 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
807 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
808 LLVMBuildStore(builder, real_val, scalar_ptr);
809 }
810 else {
811 LLVMBuildStore(builder, val, scalar_ptr);
812 }
813 }
814 }
815
816
817 /**
818 * Read the current value of the ADDR register, convert the floats to
819 * ints, add the base index and return the vector of offsets.
820 * The offsets will be used to index into the constant buffer or
821 * temporary register file.
822 */
823 static LLVMValueRef
824 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
825 unsigned reg_file, unsigned reg_index,
826 const struct tgsi_ind_register *indirect_reg)
827 {
828 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
829 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
830 /* always use X component of address register */
831 unsigned swizzle = indirect_reg->Swizzle;
832 LLVMValueRef base;
833 LLVMValueRef rel;
834 LLVMValueRef max_index;
835 LLVMValueRef index;
836
837 assert(bld->indirect_files & (1 << reg_file));
838
839 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
840
841 assert(swizzle < 4);
842 switch (indirect_reg->File) {
843 case TGSI_FILE_ADDRESS:
844 rel = LLVMBuildLoad(builder,
845 bld->addr[indirect_reg->Index][swizzle],
846 "load addr reg");
847 /* ADDR LLVM values already have LLVM integer type. */
848 break;
849 case TGSI_FILE_TEMPORARY:
850 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
851 rel = LLVMBuildLoad(builder, rel, "load temp reg");
852 /* TEMP LLVM values always have LLVM float type, but for indirection, the
853 * value actually stored is expected to be an integer */
854 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
855 break;
856 default:
857 assert(0);
858 rel = uint_bld->zero;
859 }
860
861 index = lp_build_add(uint_bld, base, rel);
862
863 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
864 uint_bld->type,
865 bld->bld_base.info->file_max[reg_file]);
866
867 assert(!uint_bld->type.sign);
868 index = lp_build_min(uint_bld, index, max_index);
869
870 return index;
871 }
872
873 static struct lp_build_context *
874 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
875 enum tgsi_opcode_type stype)
876 {
877 struct lp_build_context *bld_fetch;
878
879 switch (stype) {
880 case TGSI_TYPE_FLOAT:
881 case TGSI_TYPE_UNTYPED:
882 bld_fetch = &bld_base->base;
883 break;
884 case TGSI_TYPE_UNSIGNED:
885 bld_fetch = &bld_base->uint_bld;
886 break;
887 case TGSI_TYPE_SIGNED:
888 bld_fetch = &bld_base->int_bld;
889 break;
890 case TGSI_TYPE_VOID:
891 case TGSI_TYPE_DOUBLE:
892 default:
893 assert(0);
894 bld_fetch = NULL;
895 break;
896 }
897 return bld_fetch;
898 }
899
900 static LLVMValueRef
901 emit_fetch_constant(
902 struct lp_build_tgsi_context * bld_base,
903 const struct tgsi_full_src_register * reg,
904 enum tgsi_opcode_type stype,
905 unsigned swizzle)
906 {
907 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
908 struct gallivm_state *gallivm = bld_base->base.gallivm;
909 LLVMBuilderRef builder = gallivm->builder;
910 struct lp_build_context *uint_bld = &bld_base->uint_bld;
911 LLVMValueRef indirect_index = NULL;
912 unsigned dimension = 0;
913 LLVMValueRef dimension_index;
914 LLVMValueRef consts_ptr;
915 LLVMValueRef res;
916
917 /* XXX: Handle fetching xyzw components as a vector */
918 assert(swizzle != ~0);
919
920 if (reg->Register.Dimension) {
921 assert(!reg->Dimension.Indirect);
922 dimension = reg->Dimension.Index;
923 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
924 }
925
926 dimension_index = lp_build_const_int32(gallivm, dimension);
927 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
928
929 if (reg->Register.Indirect) {
930 indirect_index = get_indirect_index(bld,
931 reg->Register.File,
932 reg->Register.Index,
933 &reg->Indirect);
934 }
935
936 if (reg->Register.Indirect) {
937 LLVMValueRef swizzle_vec =
938 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
939 LLVMValueRef index_vec; /* index into the const buffer */
940
941 /* index_vec = indirect_index * 4 + swizzle */
942 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
943 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
944
945 /* Gather values from the constant buffer */
946 res = build_gather(&bld_base->base, consts_ptr, index_vec);
947 }
948 else {
949 LLVMValueRef index; /* index into the const buffer */
950 LLVMValueRef scalar, scalar_ptr;
951
952 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
953
954 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
955 &index, 1, "");
956 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
957 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
958 }
959
960 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
961 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
962 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
963 }
964 return res;
965 }
966
967 static LLVMValueRef
968 emit_fetch_immediate(
969 struct lp_build_tgsi_context * bld_base,
970 const struct tgsi_full_src_register * reg,
971 enum tgsi_opcode_type stype,
972 unsigned swizzle)
973 {
974 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
975 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
976 LLVMBuilderRef builder = gallivm->builder;
977 struct lp_build_context *uint_bld = &bld_base->uint_bld;
978 struct lp_build_context *float_bld = &bld_base->base;
979 LLVMValueRef res = NULL;
980 LLVMValueRef indirect_index = NULL;
981
982 if (reg->Register.Indirect) {
983 indirect_index = get_indirect_index(bld,
984 reg->Register.File,
985 reg->Register.Index,
986 &reg->Indirect);
987 }
988
989 if (reg->Register.Indirect) {
990 LLVMValueRef swizzle_vec =
991 lp_build_const_int_vec(bld->bld_base.base.gallivm,
992 uint_bld->type, swizzle);
993 LLVMValueRef length_vec =
994 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
995 bld->bld_base.base.type.length);
996 LLVMValueRef index_vec; /* index into the immediate register array */
997 LLVMValueRef imms_array;
998 LLVMValueRef pixel_offsets;
999 LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
1000 LLVMTypeRef float_ptr_type;
1001 int i;
1002
1003 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1004 for (i = 0; i < float_bld->type.length; i++) {
1005 offsets[i] = lp_build_const_int32(gallivm, i);
1006 }
1007 pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
1008
1009 /* index_vec = (indirect_index * 4 + swizzle) * length */
1010 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1011 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1012 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1013 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1014
1015 /* cast imms_array pointer to float* */
1016 float_ptr_type = LLVMPointerType(
1017 LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
1018 imms_array = LLVMBuildBitCast(builder, bld->imms_array,
1019 float_ptr_type, "");
1020
1021 /* Gather values from the immediate register array */
1022 res = build_gather(&bld_base->base, imms_array, index_vec);
1023 }
1024 else {
1025 res = bld->immediates[reg->Register.Index][swizzle];
1026 }
1027
1028 if (stype == TGSI_TYPE_UNSIGNED) {
1029 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1030 } else if (stype == TGSI_TYPE_SIGNED) {
1031 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1032 }
1033 return res;
1034 }
1035
1036 static LLVMValueRef
1037 emit_fetch_input(
1038 struct lp_build_tgsi_context * bld_base,
1039 const struct tgsi_full_src_register * reg,
1040 enum tgsi_opcode_type stype,
1041 unsigned swizzle)
1042 {
1043 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1044 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1045 LLVMBuilderRef builder = gallivm->builder;
1046 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1047 struct lp_build_context *float_bld = &bld_base->base;
1048 LLVMValueRef indirect_index = NULL;
1049 LLVMValueRef res;
1050
1051 if (reg->Register.Indirect) {
1052 indirect_index = get_indirect_index(bld,
1053 reg->Register.File,
1054 reg->Register.Index,
1055 &reg->Indirect);
1056 }
1057
1058 if (reg->Register.Indirect) {
1059 LLVMValueRef swizzle_vec =
1060 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1061 LLVMValueRef length_vec =
1062 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1063 LLVMValueRef index_vec; /* index into the input reg array */
1064 LLVMValueRef inputs_array;
1065 LLVMValueRef pixel_offsets;
1066 LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
1067 LLVMTypeRef float_ptr_type;
1068 int i;
1069
1070 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1071 for (i = 0; i < float_bld->type.length; i++) {
1072 offsets[i] = lp_build_const_int32(gallivm, i);
1073 }
1074 pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
1075
1076 /* index_vec = (indirect_index * 4 + swizzle) * length */
1077 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1078 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1079 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1080 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1081
1082 /* cast inputs_array pointer to float* */
1083 float_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1084 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
1085 float_ptr_type, "");
1086
1087 /* Gather values from the input register array */
1088 res = build_gather(&bld_base->base, inputs_array, index_vec);
1089 } else {
1090 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1091 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1092 reg->Register.Index * 4 + swizzle);
1093 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1094 bld->inputs_array, &lindex, 1, "");
1095 res = LLVMBuildLoad(builder, input_ptr, "");
1096 }
1097 else {
1098 res = bld->inputs[reg->Register.Index][swizzle];
1099 }
1100 }
1101
1102 assert(res);
1103
1104 if (stype == TGSI_TYPE_UNSIGNED) {
1105 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1106 } else if (stype == TGSI_TYPE_SIGNED) {
1107 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 LLVMBuilderRef builder = gallivm->builder;
1124 LLVMValueRef attrib_index = NULL;
1125 LLVMValueRef vertex_index = NULL;
1126 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1127 LLVMValueRef res;
1128
1129 if (reg->Register.Indirect) {
1130 attrib_index = get_indirect_index(bld,
1131 reg->Register.File,
1132 reg->Register.Index,
1133 &reg->Indirect);
1134 } else {
1135 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1136 }
1137
1138 if (reg->Dimension.Indirect) {
1139 vertex_index = get_indirect_index(bld,
1140 reg->Register.File,
1141 reg->Dimension.Index,
1142 &reg->DimIndirect);
1143 } else {
1144 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1145 }
1146
1147 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1148 reg->Dimension.Indirect,
1149 vertex_index,
1150 reg->Register.Indirect,
1151 attrib_index,
1152 swizzle_index);
1153
1154 assert(res);
1155
1156 if (stype == TGSI_TYPE_UNSIGNED) {
1157 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1158 } else if (stype == TGSI_TYPE_SIGNED) {
1159 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1160 }
1161
1162 return res;
1163 }
1164
1165 static LLVMValueRef
1166 emit_fetch_temporary(
1167 struct lp_build_tgsi_context * bld_base,
1168 const struct tgsi_full_src_register * reg,
1169 enum tgsi_opcode_type stype,
1170 unsigned swizzle)
1171 {
1172 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1173 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1174 LLVMBuilderRef builder = gallivm->builder;
1175 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1176 struct lp_build_context *float_bld = &bld_base->base;
1177 LLVMValueRef indirect_index = NULL;
1178 LLVMValueRef res;
1179
1180 if (reg->Register.Indirect) {
1181 indirect_index = get_indirect_index(bld,
1182 reg->Register.File,
1183 reg->Register.Index,
1184 &reg->Indirect);
1185 }
1186
1187 if (reg->Register.Indirect) {
1188 LLVMValueRef swizzle_vec =
1189 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
1190 LLVMValueRef length_vec =
1191 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
1192 bld->bld_base.base.type.length);
1193 LLVMValueRef index_vec; /* index into the temp reg array */
1194 LLVMValueRef temps_array;
1195 LLVMValueRef pixel_offsets;
1196 LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
1197 LLVMTypeRef float_ptr_type;
1198 int i;
1199
1200 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1201 for (i = 0; i < float_bld->type.length; i++) {
1202 offsets[i] = lp_build_const_int32(gallivm, i);
1203 }
1204 pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
1205
1206 /* index_vec = (indirect_index * 4 + swizzle) * length */
1207 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1208 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1209 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1210 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1211
1212 /* cast temps_array pointer to float* */
1213 float_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
1214 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1215 float_ptr_type, "");
1216
1217 /* Gather values from the temporary register array */
1218 res = build_gather(&bld_base->base, temps_array, index_vec);
1219 }
1220 else {
1221 LLVMValueRef temp_ptr;
1222 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1223 res = LLVMBuildLoad(builder, temp_ptr, "");
1224 }
1225
1226 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1227 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1228 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1229 }
1230
1231 return res;
1232 }
1233
1234 static LLVMValueRef
1235 emit_fetch_system_value(
1236 struct lp_build_tgsi_context * bld_base,
1237 const struct tgsi_full_src_register * reg,
1238 enum tgsi_opcode_type stype,
1239 unsigned swizzle)
1240 {
1241 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1242 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1243 const struct tgsi_shader_info *info = bld->bld_base.info;
1244 LLVMBuilderRef builder = gallivm->builder;
1245 LLVMValueRef res;
1246 enum tgsi_opcode_type atype; // Actual type of the value
1247
1248 assert(!reg->Register.Indirect);
1249
1250 switch (info->system_value_semantic_name[reg->Register.Index]) {
1251 case TGSI_SEMANTIC_INSTANCEID:
1252 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1253 atype = TGSI_TYPE_UNSIGNED;
1254 break;
1255
1256 case TGSI_SEMANTIC_VERTEXID:
1257 res = bld->system_values.vertex_id;
1258 atype = TGSI_TYPE_UNSIGNED;
1259 break;
1260
1261 case TGSI_SEMANTIC_PRIMID:
1262 res = bld->system_values.prim_id;
1263 atype = TGSI_TYPE_UNSIGNED;
1264 break;
1265
1266 default:
1267 assert(!"unexpected semantic in emit_fetch_system_value");
1268 res = bld_base->base.zero;
1269 atype = TGSI_TYPE_FLOAT;
1270 break;
1271 }
1272
1273 if (atype != stype) {
1274 if (stype == TGSI_TYPE_FLOAT) {
1275 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1276 } else if (stype == TGSI_TYPE_UNSIGNED) {
1277 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1278 } else if (stype == TGSI_TYPE_SIGNED) {
1279 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1280 }
1281 }
1282
1283 return res;
1284 }
1285
1286 /**
1287 * Register fetch with derivatives.
1288 */
1289 static void
1290 emit_fetch_deriv(
1291 struct lp_build_tgsi_soa_context *bld,
1292 LLVMValueRef src,
1293 LLVMValueRef *res,
1294 LLVMValueRef *ddx,
1295 LLVMValueRef *ddy)
1296 {
1297 if(res)
1298 *res = src;
1299
1300 /* TODO: use interpolation coeffs for inputs */
1301
1302 if(ddx)
1303 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1304
1305 if(ddy)
1306 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1307 }
1308
1309
1310 /**
1311 * Predicate.
1312 */
1313 static void
1314 emit_fetch_predicate(
1315 struct lp_build_tgsi_soa_context *bld,
1316 const struct tgsi_full_instruction *inst,
1317 LLVMValueRef *pred)
1318 {
1319 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1320 unsigned index;
1321 unsigned char swizzles[4];
1322 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1323 LLVMValueRef value;
1324 unsigned chan;
1325
1326 if (!inst->Instruction.Predicate) {
1327 TGSI_FOR_EACH_CHANNEL( chan ) {
1328 pred[chan] = NULL;
1329 }
1330 return;
1331 }
1332
1333 swizzles[0] = inst->Predicate.SwizzleX;
1334 swizzles[1] = inst->Predicate.SwizzleY;
1335 swizzles[2] = inst->Predicate.SwizzleZ;
1336 swizzles[3] = inst->Predicate.SwizzleW;
1337
1338 index = inst->Predicate.Index;
1339 assert(index < LP_MAX_TGSI_PREDS);
1340
1341 TGSI_FOR_EACH_CHANNEL( chan ) {
1342 unsigned swizzle = swizzles[chan];
1343
1344 /*
1345 * Only fetch the predicate register channels that are actually listed
1346 * in the swizzles
1347 */
1348 if (!unswizzled[swizzle]) {
1349 value = LLVMBuildLoad(builder,
1350 bld->preds[index][swizzle], "");
1351
1352 /*
1353 * Convert the value to an integer mask.
1354 *
1355 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1356 * is needlessly causing two comparisons due to storing the intermediate
1357 * result as float vector instead of an integer mask vector.
1358 */
1359 value = lp_build_compare(bld->bld_base.base.gallivm,
1360 bld->bld_base.base.type,
1361 PIPE_FUNC_NOTEQUAL,
1362 value,
1363 bld->bld_base.base.zero);
1364 if (inst->Predicate.Negate) {
1365 value = LLVMBuildNot(builder, value, "");
1366 }
1367
1368 unswizzled[swizzle] = value;
1369 } else {
1370 value = unswizzled[swizzle];
1371 }
1372
1373 pred[chan] = value;
1374 }
1375 }
1376
1377 /**
1378 * Register store.
1379 */
1380 static void
1381 emit_store_chan(
1382 struct lp_build_tgsi_context *bld_base,
1383 const struct tgsi_full_instruction *inst,
1384 unsigned index,
1385 unsigned chan_index,
1386 LLVMValueRef pred,
1387 LLVMValueRef value)
1388 {
1389 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1390 struct gallivm_state *gallivm = bld_base->base.gallivm;
1391 LLVMBuilderRef builder = gallivm->builder;
1392 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1393 struct lp_build_context *float_bld = &bld_base->base;
1394 struct lp_build_context *int_bld = &bld_base->int_bld;
1395 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1396 LLVMValueRef indirect_index = NULL;
1397 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1398
1399 /*
1400 * Apply saturation.
1401 *
1402 * It is always assumed to be float.
1403 */
1404 switch( inst->Instruction.Saturate ) {
1405 case TGSI_SAT_NONE:
1406 break;
1407
1408 case TGSI_SAT_ZERO_ONE:
1409 assert(dtype == TGSI_TYPE_FLOAT ||
1410 dtype == TGSI_TYPE_UNTYPED);
1411 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1412 value = lp_build_max_ext(float_bld, value, float_bld->zero,
1413 GALLIVM_NAN_RETURN_SECOND);
1414 value = lp_build_min_ext(float_bld, value, float_bld->one,
1415 GALLIVM_NAN_BEHAVIOR_UNDEFINED);
1416 break;
1417
1418 case TGSI_SAT_MINUS_PLUS_ONE:
1419 assert(dtype == TGSI_TYPE_FLOAT ||
1420 dtype == TGSI_TYPE_UNTYPED);
1421 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1422 value = lp_build_max_ext(float_bld, value,
1423 lp_build_const_vec(gallivm, float_bld->type, -1.0),
1424 GALLIVM_NAN_RETURN_SECOND);
1425 value = lp_build_min_ext(float_bld, value, float_bld->one,
1426 GALLIVM_NAN_BEHAVIOR_UNDEFINED);
1427 break;
1428
1429 default:
1430 assert(0);
1431 }
1432
1433 if (reg->Register.Indirect) {
1434 indirect_index = get_indirect_index(bld,
1435 reg->Register.File,
1436 reg->Register.Index,
1437 &reg->Indirect);
1438 } else {
1439 assert(reg->Register.Index <=
1440 bld_base->info->file_max[reg->Register.File]);
1441 }
1442
1443 switch( reg->Register.File ) {
1444 case TGSI_FILE_OUTPUT:
1445 /* Outputs are always stored as floats */
1446 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1447
1448 if (reg->Register.Indirect) {
1449 LLVMValueRef chan_vec =
1450 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1451 LLVMValueRef length_vec =
1452 lp_build_const_int_vec(gallivm, uint_bld->type, float_bld->type.length);
1453 LLVMValueRef index_vec; /* indexes into the temp registers */
1454 LLVMValueRef outputs_array;
1455 LLVMValueRef pixel_offsets;
1456 LLVMTypeRef float_ptr_type;
1457 int i;
1458
1459 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1460 pixel_offsets = uint_bld->undef;
1461 for (i = 0; i < float_bld->type.length; i++) {
1462 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1463 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1464 ii, ii, "");
1465 }
1466
1467 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1468 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1469 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1470 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1471 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1472
1473 float_ptr_type =
1474 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1475 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1476 float_ptr_type, "");
1477
1478 /* Scatter store values into temp registers */
1479 emit_mask_scatter(bld, outputs_array, index_vec, value,
1480 &bld->exec_mask, pred);
1481 }
1482 else {
1483 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1484 chan_index);
1485 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1486 }
1487 break;
1488
1489 case TGSI_FILE_TEMPORARY:
1490 /* Temporaries are always stored as floats */
1491 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1492
1493 if (reg->Register.Indirect) {
1494 LLVMValueRef chan_vec =
1495 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1496 LLVMValueRef length_vec =
1497 lp_build_const_int_vec(gallivm, uint_bld->type,
1498 float_bld->type.length);
1499 LLVMValueRef index_vec; /* indexes into the temp registers */
1500 LLVMValueRef temps_array;
1501 LLVMValueRef pixel_offsets;
1502 LLVMTypeRef float_ptr_type;
1503 int i;
1504
1505 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1506 pixel_offsets = uint_bld->undef;
1507 for (i = 0; i < float_bld->type.length; i++) {
1508 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1509 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1510 ii, ii, "");
1511 }
1512
1513 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1514 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1515 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1516 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1517 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1518
1519 float_ptr_type =
1520 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1521 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1522 float_ptr_type, "");
1523
1524 /* Scatter store values into temp registers */
1525 emit_mask_scatter(bld, temps_array, index_vec, value,
1526 &bld->exec_mask, pred);
1527 }
1528 else {
1529 LLVMValueRef temp_ptr;
1530 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1531 chan_index);
1532 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1533 }
1534 break;
1535
1536 case TGSI_FILE_ADDRESS:
1537 assert(dtype == TGSI_TYPE_SIGNED);
1538 assert(LLVMTypeOf(value) == int_bld->vec_type);
1539 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1540 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1541 bld->addr[reg->Register.Index][chan_index]);
1542 break;
1543
1544 case TGSI_FILE_PREDICATE:
1545 assert(LLVMTypeOf(value) == float_bld->vec_type);
1546 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1547 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1548 bld->preds[reg->Register.Index][chan_index]);
1549 break;
1550
1551 default:
1552 assert( 0 );
1553 }
1554
1555 (void)dtype;
1556 }
1557
1558 static void
1559 emit_store(
1560 struct lp_build_tgsi_context * bld_base,
1561 const struct tgsi_full_instruction * inst,
1562 const struct tgsi_opcode_info * info,
1563 LLVMValueRef dst[4])
1564
1565 {
1566 unsigned chan_index;
1567 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1568
1569 if(info->num_dst) {
1570 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1571
1572 emit_fetch_predicate( bld, inst, pred );
1573
1574 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1575 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1576 }
1577 }
1578 }
1579
1580 static unsigned
1581 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1582 {
1583 switch (tgsi_target) {
1584 case TGSI_TEXTURE_BUFFER:
1585 return PIPE_BUFFER;
1586 case TGSI_TEXTURE_1D:
1587 case TGSI_TEXTURE_SHADOW1D:
1588 return PIPE_TEXTURE_1D;
1589 case TGSI_TEXTURE_2D:
1590 case TGSI_TEXTURE_SHADOW2D:
1591 case TGSI_TEXTURE_2D_MSAA:
1592 return PIPE_TEXTURE_2D;
1593 case TGSI_TEXTURE_3D:
1594 return PIPE_TEXTURE_3D;
1595 case TGSI_TEXTURE_CUBE:
1596 case TGSI_TEXTURE_SHADOWCUBE:
1597 return PIPE_TEXTURE_CUBE;
1598 case TGSI_TEXTURE_RECT:
1599 case TGSI_TEXTURE_SHADOWRECT:
1600 return PIPE_TEXTURE_RECT;
1601 case TGSI_TEXTURE_1D_ARRAY:
1602 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1603 return PIPE_TEXTURE_1D_ARRAY;
1604 case TGSI_TEXTURE_2D_ARRAY:
1605 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1606 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1607 return PIPE_TEXTURE_2D_ARRAY;
1608 case TGSI_TEXTURE_CUBE_ARRAY:
1609 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1610 return PIPE_TEXTURE_CUBE_ARRAY;
1611 default:
1612 assert(0);
1613 return PIPE_BUFFER;
1614 }
1615 }
1616
1617
1618 static enum lp_sampler_lod_property
1619 lp_build_lod_property(
1620 struct lp_build_tgsi_context *bld_base,
1621 const struct tgsi_full_instruction *inst,
1622 unsigned src_op)
1623 {
1624 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1625 enum lp_sampler_lod_property lod_property;
1626
1627 /*
1628 * Not much we can do here. We could try catching inputs declared
1629 * with constant interpolation but not sure it's worth it - since for
1630 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1631 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1632 * like the constant/immediate recognition below.
1633 * What seems to be of more value would be to recognize temps holding
1634 * broadcasted scalars but no way we can do it.
1635 * Tried asking llvm but without any success (using LLVMIsConstant
1636 * even though this isn't exactly what we'd need), even as simple as
1637 * IMM[0] UINT32 (0,-1,0,0)
1638 * MOV TEMP[0] IMM[0].yyyy
1639 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1640 * doesn't work.
1641 * This means there's ZERO chance this will ever catch a scalar lod
1642 * with traditional tex opcodes as well as texel fetches, since the lod
1643 * comes from the same reg as coords (except some test shaders using
1644 * constant coords maybe).
1645 * There's at least hope for sample opcodes as well as size queries.
1646 */
1647 if (reg->Register.File == TGSI_FILE_CONSTANT ||
1648 reg->Register.File == TGSI_FILE_IMMEDIATE) {
1649 lod_property = LP_SAMPLER_LOD_SCALAR;
1650 }
1651 else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
1652 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1653 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1654 }
1655 else {
1656 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1657 }
1658 }
1659 else {
1660 /* never use scalar (per-quad) lod the results are just too wrong. */
1661 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1662 }
1663 return lod_property;
1664 }
1665
1666
1667 /**
1668 * High-level instruction translators.
1669 */
1670
1671 static void
1672 emit_tex( struct lp_build_tgsi_soa_context *bld,
1673 const struct tgsi_full_instruction *inst,
1674 enum lp_build_tex_modifier modifier,
1675 LLVMValueRef *texel)
1676 {
1677 unsigned unit;
1678 LLVMValueRef lod_bias, explicit_lod;
1679 LLVMValueRef oow = NULL;
1680 LLVMValueRef coords[5];
1681 LLVMValueRef offsets[3] = { NULL };
1682 struct lp_derivatives derivs;
1683 struct lp_derivatives *deriv_ptr = NULL;
1684 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1685 unsigned num_derivs, num_offsets, i;
1686 unsigned shadow_coord = 0;
1687 unsigned layer_coord = 0;
1688
1689 if (!bld->sampler) {
1690 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1691 for (i = 0; i < 4; i++) {
1692 texel[i] = bld->bld_base.base.undef;
1693 }
1694 return;
1695 }
1696
1697 switch (inst->Texture.Texture) {
1698 case TGSI_TEXTURE_1D_ARRAY:
1699 layer_coord = 1;
1700 /* fallthrough */
1701 case TGSI_TEXTURE_1D:
1702 num_offsets = 1;
1703 num_derivs = 1;
1704 break;
1705 case TGSI_TEXTURE_2D_ARRAY:
1706 layer_coord = 2;
1707 /* fallthrough */
1708 case TGSI_TEXTURE_2D:
1709 case TGSI_TEXTURE_RECT:
1710 num_offsets = 2;
1711 num_derivs = 2;
1712 break;
1713 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1714 layer_coord = 1;
1715 /* fallthrough */
1716 case TGSI_TEXTURE_SHADOW1D:
1717 shadow_coord = 2;
1718 num_offsets = 1;
1719 num_derivs = 1;
1720 break;
1721 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1722 layer_coord = 2;
1723 shadow_coord = 3;
1724 num_offsets = 2;
1725 num_derivs = 2;
1726 break;
1727 case TGSI_TEXTURE_SHADOW2D:
1728 case TGSI_TEXTURE_SHADOWRECT:
1729 shadow_coord = 2;
1730 num_offsets = 2;
1731 num_derivs = 2;
1732 break;
1733 case TGSI_TEXTURE_CUBE:
1734 num_offsets = 2;
1735 num_derivs = 3;
1736 break;
1737 case TGSI_TEXTURE_3D:
1738 num_offsets = 3;
1739 num_derivs = 3;
1740 break;
1741 case TGSI_TEXTURE_SHADOWCUBE:
1742 shadow_coord = 3;
1743 num_offsets = 2;
1744 num_derivs = 3;
1745 break;
1746 case TGSI_TEXTURE_CUBE_ARRAY:
1747 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1748 case TGSI_TEXTURE_2D_MSAA:
1749 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1750 default:
1751 assert(0);
1752 return;
1753 }
1754
1755 /* Note lod and especially projected are illegal in a LOT of cases */
1756 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
1757 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1758 LLVMValueRef lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1759 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1760 lod_bias = lod;
1761 explicit_lod = NULL;
1762 }
1763 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1764 lod_bias = NULL;
1765 explicit_lod = lod;
1766 }
1767 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
1768 }
1769 else {
1770 lod_bias = NULL;
1771 explicit_lod = NULL;
1772 }
1773
1774 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1775 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
1776 oow = lp_build_rcp(&bld->bld_base.base, oow);
1777 }
1778
1779 for (i = 0; i < num_derivs; i++) {
1780 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
1781 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1782 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1783 }
1784 for (i = num_derivs; i < 5; i++) {
1785 coords[i] = bld->bld_base.base.undef;
1786 }
1787
1788 /* Layer coord always goes into 3rd slot, except for cube map arrays */
1789 if (layer_coord) {
1790 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1791 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1792 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
1793 }
1794 /* Shadow coord occupies always 5th slot. */
1795 if (shadow_coord) {
1796 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
1797 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1798 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
1799 }
1800
1801 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1802 unsigned dim;
1803 for (dim = 0; dim < num_derivs; ++dim) {
1804 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
1805 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
1806 }
1807 deriv_ptr = &derivs;
1808 unit = inst->Src[3].Register.Index;
1809 /*
1810 * could also check all src regs if constant but I doubt such
1811 * cases exist in practice.
1812 */
1813 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
1814 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1815 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1816 }
1817 else {
1818 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1819 }
1820 }
1821 else {
1822 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1823 }
1824 } else {
1825 unit = inst->Src[1].Register.Index;
1826 }
1827
1828 /* some advanced gather instructions (txgo) would require 4 offsets */
1829 if (inst->Texture.NumOffsets == 1) {
1830 unsigned dim;
1831 for (dim = 0; dim < num_offsets; dim++) {
1832 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
1833 }
1834 }
1835
1836 bld->sampler->emit_fetch_texel(bld->sampler,
1837 bld->bld_base.base.gallivm,
1838 bld->bld_base.base.type,
1839 FALSE,
1840 unit, unit,
1841 coords,
1842 offsets,
1843 deriv_ptr,
1844 lod_bias, explicit_lod, lod_property,
1845 texel);
1846 }
1847
1848 static void
1849 emit_sample(struct lp_build_tgsi_soa_context *bld,
1850 const struct tgsi_full_instruction *inst,
1851 enum lp_build_tex_modifier modifier,
1852 boolean compare,
1853 LLVMValueRef *texel)
1854 {
1855 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1856 unsigned texture_unit, sampler_unit;
1857 LLVMValueRef lod_bias, explicit_lod;
1858 LLVMValueRef coords[5];
1859 LLVMValueRef offsets[3] = { NULL };
1860 struct lp_derivatives derivs;
1861 struct lp_derivatives *deriv_ptr = NULL;
1862 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1863
1864 unsigned num_offsets, num_derivs, i;
1865 unsigned layer_coord = 0;
1866
1867 if (!bld->sampler) {
1868 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1869 for (i = 0; i < 4; i++) {
1870 texel[i] = bld->bld_base.base.undef;
1871 }
1872 return;
1873 }
1874
1875 /*
1876 * unlike old-style tex opcodes the texture/sampler indices
1877 * always come from src1 and src2 respectively.
1878 */
1879 texture_unit = inst->Src[1].Register.Index;
1880 sampler_unit = inst->Src[2].Register.Index;
1881
1882 /*
1883 * Note inst->Texture.Texture will contain the number of offsets,
1884 * however the target information is NOT there and comes from the
1885 * declared sampler views instead.
1886 */
1887 switch (bld->sv[texture_unit].Resource) {
1888 case TGSI_TEXTURE_1D:
1889 num_offsets = 1;
1890 num_derivs = 1;
1891 break;
1892 case TGSI_TEXTURE_1D_ARRAY:
1893 layer_coord = 1;
1894 num_offsets = 1;
1895 num_derivs = 1;
1896 break;
1897 case TGSI_TEXTURE_2D:
1898 case TGSI_TEXTURE_RECT:
1899 num_offsets = 2;
1900 num_derivs = 2;
1901 break;
1902 case TGSI_TEXTURE_2D_ARRAY:
1903 layer_coord = 2;
1904 num_offsets = 2;
1905 num_derivs = 2;
1906 break;
1907 case TGSI_TEXTURE_CUBE:
1908 num_offsets = 2;
1909 num_derivs = 3;
1910 break;
1911 case TGSI_TEXTURE_3D:
1912 num_offsets = 3;
1913 num_derivs = 3;
1914 break;
1915 case TGSI_TEXTURE_CUBE_ARRAY:
1916 layer_coord = 3;
1917 num_offsets = 2;
1918 num_derivs = 3;
1919 break;
1920 default:
1921 assert(0);
1922 return;
1923 }
1924
1925 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
1926 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1927 LLVMValueRef lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
1928 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1929 lod_bias = lod;
1930 explicit_lod = NULL;
1931 }
1932 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1933 lod_bias = NULL;
1934 explicit_lod = lod;
1935 }
1936 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
1937 }
1938 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1939 lod_bias = NULL;
1940 /* XXX might be better to explicitly pass the level zero information */
1941 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1942 }
1943 else {
1944 lod_bias = NULL;
1945 explicit_lod = NULL;
1946 }
1947
1948 for (i = 0; i < num_derivs; i++) {
1949 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
1950 }
1951 for (i = num_derivs; i < 5; i++) {
1952 coords[i] = bld->bld_base.base.undef;
1953 }
1954
1955 /* Layer coord always goes into 3rd slot, except for cube map arrays */
1956 if (layer_coord) {
1957 if (layer_coord == 3)
1958 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1959 else
1960 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
1961 }
1962 /* Shadow coord occupies always 5th slot. */
1963 if (compare) {
1964 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
1965 }
1966
1967 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1968 unsigned dim;
1969 for (dim = 0; dim < num_derivs; ++dim) {
1970 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
1971 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
1972 }
1973 deriv_ptr = &derivs;
1974 /*
1975 * could also check all src regs if constant but I doubt such
1976 * cases exist in practice.
1977 */
1978 if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
1979 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1980 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1981 }
1982 else {
1983 lod_property = LP_SAMPLER_LOD_PER_QUAD;
1984 }
1985 }
1986 else {
1987 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1988 }
1989 }
1990
1991 /* some advanced gather instructions (txgo) would require 4 offsets */
1992 if (inst->Texture.NumOffsets == 1) {
1993 unsigned dim;
1994 for (dim = 0; dim < num_offsets; dim++) {
1995 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
1996 }
1997 }
1998
1999 bld->sampler->emit_fetch_texel(bld->sampler,
2000 bld->bld_base.base.gallivm,
2001 bld->bld_base.base.type,
2002 FALSE,
2003 texture_unit, sampler_unit,
2004 coords,
2005 offsets,
2006 deriv_ptr,
2007 lod_bias, explicit_lod, lod_property,
2008 texel);
2009
2010 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2011 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2012 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2013 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
2014 unsigned char swizzles[4];
2015 swizzles[0] = inst->Src[1].Register.SwizzleX;
2016 swizzles[1] = inst->Src[1].Register.SwizzleY;
2017 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2018 swizzles[3] = inst->Src[1].Register.SwizzleW;
2019
2020 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2021 }
2022 }
2023
2024 static void
2025 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2026 const struct tgsi_full_instruction *inst,
2027 LLVMValueRef *texel,
2028 boolean is_samplei)
2029 {
2030 unsigned unit, target;
2031 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2032 LLVMValueRef explicit_lod = NULL;
2033 LLVMValueRef coords[3];
2034 LLVMValueRef offsets[3] = { NULL };
2035 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2036 unsigned dims, i;
2037 unsigned layer_coord = 0;
2038
2039 if (!bld->sampler) {
2040 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2041 for (i = 0; i < 4; i++) {
2042 texel[i] = coord_undef;
2043 }
2044 return;
2045 }
2046
2047 unit = inst->Src[1].Register.Index;
2048
2049 if (is_samplei) {
2050 target = bld->sv[unit].Resource;
2051 }
2052 else {
2053 target = inst->Texture.Texture;
2054 }
2055
2056 switch (target) {
2057 case TGSI_TEXTURE_1D:
2058 case TGSI_TEXTURE_BUFFER:
2059 dims = 1;
2060 break;
2061 case TGSI_TEXTURE_1D_ARRAY:
2062 layer_coord = 1;
2063 dims = 1;
2064 break;
2065 case TGSI_TEXTURE_2D:
2066 case TGSI_TEXTURE_RECT:
2067 dims = 2;
2068 break;
2069 case TGSI_TEXTURE_2D_ARRAY:
2070 layer_coord = 2;
2071 dims = 2;
2072 break;
2073 case TGSI_TEXTURE_3D:
2074 dims = 3;
2075 break;
2076 default:
2077 assert(0);
2078 return;
2079 }
2080
2081 /* always have lod except for buffers ? */
2082 if (target != TGSI_TEXTURE_BUFFER) {
2083 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2084 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2085 }
2086
2087 for (i = 0; i < dims; i++) {
2088 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2089 }
2090 for (i = dims; i < 3; i++) {
2091 coords[i] = coord_undef;
2092 }
2093 if (layer_coord)
2094 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2095
2096 if (inst->Texture.NumOffsets == 1) {
2097 unsigned dim;
2098 for (dim = 0; dim < dims; dim++) {
2099 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2100 }
2101 }
2102
2103 bld->sampler->emit_fetch_texel(bld->sampler,
2104 bld->bld_base.base.gallivm,
2105 bld->bld_base.base.type,
2106 TRUE,
2107 unit, unit,
2108 coords,
2109 offsets,
2110 NULL,
2111 NULL, explicit_lod, lod_property,
2112 texel);
2113
2114 if (is_samplei &&
2115 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2116 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2117 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2118 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
2119 unsigned char swizzles[4];
2120 swizzles[0] = inst->Src[1].Register.SwizzleX;
2121 swizzles[1] = inst->Src[1].Register.SwizzleY;
2122 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2123 swizzles[3] = inst->Src[1].Register.SwizzleW;
2124
2125 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2126 }
2127 }
2128
2129 static void
2130 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2131 const struct tgsi_full_instruction *inst,
2132 LLVMValueRef *sizes_out,
2133 boolean is_sviewinfo)
2134 {
2135 LLVMValueRef explicit_lod;
2136 enum lp_sampler_lod_property lod_property;
2137 unsigned has_lod;
2138 unsigned i;
2139 unsigned unit = inst->Src[1].Register.Index;
2140 unsigned target, pipe_target;
2141
2142 if (is_sviewinfo) {
2143 target = bld->sv[unit].Resource;
2144 }
2145 else {
2146 target = inst->Texture.Texture;
2147 }
2148 switch (target) {
2149 case TGSI_TEXTURE_BUFFER:
2150 case TGSI_TEXTURE_RECT:
2151 case TGSI_TEXTURE_SHADOWRECT:
2152 has_lod = 0;
2153 break;
2154 default:
2155 has_lod = 1;
2156 break;
2157 }
2158
2159 if (!bld->sampler) {
2160 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2161 for (i = 0; i < 4; i++)
2162 sizes_out[i] = bld->bld_base.int_bld.undef;
2163 return;
2164 }
2165
2166 if (has_lod) {
2167 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2168 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2169 }
2170 else {
2171 explicit_lod = NULL;
2172 lod_property = LP_SAMPLER_LOD_SCALAR;
2173 }
2174
2175
2176 pipe_target = tgsi_to_pipe_tex_target(target);
2177
2178 bld->sampler->emit_size_query(bld->sampler,
2179 bld->bld_base.base.gallivm,
2180 bld->bld_base.int_bld.type,
2181 unit, pipe_target,
2182 is_sviewinfo,
2183 lod_property,
2184 explicit_lod,
2185 sizes_out);
2186 }
2187
2188 static boolean
2189 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2190 int pc)
2191 {
2192 int i;
2193
2194 for (i = 0; i < 5; i++) {
2195 unsigned opcode;
2196
2197 if (pc + i >= bld->bld_base.info->num_instructions)
2198 return TRUE;
2199
2200 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2201
2202 if (opcode == TGSI_OPCODE_END)
2203 return TRUE;
2204
2205 if (opcode == TGSI_OPCODE_TEX ||
2206 opcode == TGSI_OPCODE_TXP ||
2207 opcode == TGSI_OPCODE_TXD ||
2208 opcode == TGSI_OPCODE_TXB ||
2209 opcode == TGSI_OPCODE_TXL ||
2210 opcode == TGSI_OPCODE_TXF ||
2211 opcode == TGSI_OPCODE_TXQ ||
2212 opcode == TGSI_OPCODE_CAL ||
2213 opcode == TGSI_OPCODE_CALLNZ ||
2214 opcode == TGSI_OPCODE_IF ||
2215 opcode == TGSI_OPCODE_UIF ||
2216 opcode == TGSI_OPCODE_BGNLOOP ||
2217 opcode == TGSI_OPCODE_SWITCH)
2218 return FALSE;
2219 }
2220
2221 return TRUE;
2222 }
2223
2224
2225
2226 /**
2227 * Kill fragment if any of the src register values are negative.
2228 */
2229 static void
2230 emit_kill_if(
2231 struct lp_build_tgsi_soa_context *bld,
2232 const struct tgsi_full_instruction *inst,
2233 int pc)
2234 {
2235 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2236 const struct tgsi_full_src_register *reg = &inst->Src[0];
2237 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2238 LLVMValueRef mask;
2239 unsigned chan_index;
2240
2241 memset(&terms, 0, sizeof terms);
2242
2243 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2244 unsigned swizzle;
2245
2246 /* Unswizzle channel */
2247 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2248
2249 /* Check if the component has not been already tested. */
2250 assert(swizzle < TGSI_NUM_CHANNELS);
2251 if( !terms[swizzle] )
2252 /* TODO: change the comparison operator instead of setting the sign */
2253 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2254 }
2255
2256 mask = NULL;
2257 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2258 if(terms[chan_index]) {
2259 LLVMValueRef chan_mask;
2260
2261 /*
2262 * If term < 0 then mask = 0 else mask = ~0.
2263 */
2264 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2265
2266 if(mask)
2267 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2268 else
2269 mask = chan_mask;
2270 }
2271 }
2272
2273 if(mask) {
2274 lp_build_mask_update(bld->mask, mask);
2275
2276 if (!near_end_of_shader(bld, pc))
2277 lp_build_mask_check(bld->mask);
2278 }
2279 }
2280
2281
2282 /**
2283 * Unconditional fragment kill.
2284 * The only predication is the execution mask which will apply if
2285 * we're inside a loop or conditional.
2286 */
2287 static void
2288 emit_kill(struct lp_build_tgsi_soa_context *bld,
2289 int pc)
2290 {
2291 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2292 LLVMValueRef mask;
2293
2294 /* For those channels which are "alive", disable fragment shader
2295 * execution.
2296 */
2297 if (bld->exec_mask.has_mask) {
2298 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2299 }
2300 else {
2301 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2302 mask = zero;
2303 }
2304
2305 lp_build_mask_update(bld->mask, mask);
2306
2307 if (!near_end_of_shader(bld, pc))
2308 lp_build_mask_check(bld->mask);
2309 }
2310
2311
2312 /**
2313 * Emit code which will dump the value of all the temporary registers
2314 * to stdout.
2315 */
2316 static void
2317 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
2318 {
2319 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2320 LLVMBuilderRef builder = gallivm->builder;
2321 LLVMValueRef temp_ptr;
2322 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
2323 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
2324 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
2325 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
2326 int index;
2327 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
2328
2329 for (index = 0; index < n; index++) {
2330 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
2331 LLVMValueRef v[4][4], res;
2332 int chan;
2333
2334 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
2335
2336 for (chan = 0; chan < 4; chan++) {
2337 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2338 res = LLVMBuildLoad(builder, temp_ptr, "");
2339 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
2340 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
2341 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
2342 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
2343 }
2344
2345 lp_build_printf(gallivm, " X: %f %f %f %f\n",
2346 v[0][0], v[0][1], v[0][2], v[0][3]);
2347 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
2348 v[1][0], v[1][1], v[1][2], v[1][3]);
2349 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
2350 v[2][0], v[2][1], v[2][2], v[2][3]);
2351 lp_build_printf(gallivm, " W: %f %f %f %f\n",
2352 v[3][0], v[3][1], v[3][2], v[3][3]);
2353 }
2354 }
2355
2356
2357
2358 void
2359 lp_emit_declaration_soa(
2360 struct lp_build_tgsi_context *bld_base,
2361 const struct tgsi_full_declaration *decl)
2362 {
2363 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2364 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2365 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2366 const unsigned first = decl->Range.First;
2367 const unsigned last = decl->Range.Last;
2368 unsigned idx, i;
2369
2370 for (idx = first; idx <= last; ++idx) {
2371 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2372 switch (decl->Declaration.File) {
2373 case TGSI_FILE_TEMPORARY:
2374 assert(idx < LP_MAX_TGSI_TEMPS);
2375 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2376 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2377 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2378 }
2379 break;
2380
2381 case TGSI_FILE_OUTPUT:
2382 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2383 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2384 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2385 vec_type, "output");
2386 }
2387 break;
2388
2389 case TGSI_FILE_ADDRESS:
2390 /* ADDR registers are only allocated with an integer LLVM IR type,
2391 * as they are guaranteed to always have integers.
2392 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2393 * an ADDR register for that matter).
2394 */
2395 assert(idx < LP_MAX_TGSI_ADDRS);
2396 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2397 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2398 break;
2399
2400 case TGSI_FILE_PREDICATE:
2401 assert(idx < LP_MAX_TGSI_PREDS);
2402 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2403 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2404 "predicate");
2405 break;
2406
2407 case TGSI_FILE_SAMPLER_VIEW:
2408 /*
2409 * The target stored here MUST match whatever there actually
2410 * is in the set sampler views (what about return type?).
2411 */
2412 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2413 bld->sv[idx] = decl->SamplerView;
2414 break;
2415
2416 default:
2417 /* don't need to declare other vars */
2418 break;
2419 }
2420 }
2421 }
2422
2423
2424 void lp_emit_immediate_soa(
2425 struct lp_build_tgsi_context *bld_base,
2426 const struct tgsi_full_immediate *imm)
2427 {
2428 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2429 struct gallivm_state * gallivm = bld_base->base.gallivm;
2430
2431 /* simply copy the immediate values into the next immediates[] slot */
2432 unsigned i;
2433 const uint size = imm->Immediate.NrTokens - 1;
2434 assert(size <= 4);
2435 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
2436 switch (imm->Immediate.DataType) {
2437 case TGSI_IMM_FLOAT32:
2438 for( i = 0; i < size; ++i )
2439 bld->immediates[bld->num_immediates][i] =
2440 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2441
2442 break;
2443 case TGSI_IMM_UINT32:
2444 for( i = 0; i < size; ++i ) {
2445 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2446 bld->immediates[bld->num_immediates][i] =
2447 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2448 }
2449
2450 break;
2451 case TGSI_IMM_INT32:
2452 for( i = 0; i < size; ++i ) {
2453 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2454 bld->immediates[bld->num_immediates][i] =
2455 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2456 }
2457
2458 break;
2459 }
2460 for( i = size; i < 4; ++i )
2461 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
2462
2463 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2464 unsigned index = bld->num_immediates;
2465 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2466 LLVMBuilderRef builder = gallivm->builder;
2467 for (i = 0; i < 4; ++i ) {
2468 LLVMValueRef lindex = lp_build_const_int32(
2469 bld->bld_base.base.gallivm, index * 4 + i);
2470 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2471 bld->imms_array, &lindex, 1, "");
2472 LLVMBuildStore(builder,
2473 bld->immediates[index][i],
2474 imm_ptr);
2475 }
2476 }
2477
2478 bld->num_immediates++;
2479 }
2480
2481 static void
2482 ddx_emit(
2483 const struct lp_build_tgsi_action * action,
2484 struct lp_build_tgsi_context * bld_base,
2485 struct lp_build_emit_data * emit_data)
2486 {
2487 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2488
2489 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2490 &emit_data->output[emit_data->chan], NULL);
2491 }
2492
2493 static void
2494 ddy_emit(
2495 const struct lp_build_tgsi_action * action,
2496 struct lp_build_tgsi_context * bld_base,
2497 struct lp_build_emit_data * emit_data)
2498 {
2499 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2500
2501 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2502 &emit_data->output[emit_data->chan]);
2503 }
2504
2505 static void
2506 kill_emit(
2507 const struct lp_build_tgsi_action * action,
2508 struct lp_build_tgsi_context * bld_base,
2509 struct lp_build_emit_data * emit_data)
2510 {
2511 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2512
2513 emit_kill(bld, bld_base->pc - 1);
2514 }
2515
2516 static void
2517 kill_if_emit(
2518 const struct lp_build_tgsi_action * action,
2519 struct lp_build_tgsi_context * bld_base,
2520 struct lp_build_emit_data * emit_data)
2521 {
2522 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2523
2524 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2525 }
2526
2527 static void
2528 tex_emit(
2529 const struct lp_build_tgsi_action * action,
2530 struct lp_build_tgsi_context * bld_base,
2531 struct lp_build_emit_data * emit_data)
2532 {
2533 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2534
2535 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2536 }
2537
2538 static void
2539 txb_emit(
2540 const struct lp_build_tgsi_action * action,
2541 struct lp_build_tgsi_context * bld_base,
2542 struct lp_build_emit_data * emit_data)
2543 {
2544 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2545
2546 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2547 emit_data->output);
2548 }
2549
2550 static void
2551 txd_emit(
2552 const struct lp_build_tgsi_action * action,
2553 struct lp_build_tgsi_context * bld_base,
2554 struct lp_build_emit_data * emit_data)
2555 {
2556 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2557
2558 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2559 emit_data->output);
2560 }
2561
2562 static void
2563 txl_emit(
2564 const struct lp_build_tgsi_action * action,
2565 struct lp_build_tgsi_context * bld_base,
2566 struct lp_build_emit_data * emit_data)
2567 {
2568 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2569
2570 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2571 emit_data->output);
2572 }
2573
2574 static void
2575 txp_emit(
2576 const struct lp_build_tgsi_action * action,
2577 struct lp_build_tgsi_context * bld_base,
2578 struct lp_build_emit_data * emit_data)
2579 {
2580 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2581
2582 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2583 emit_data->output);
2584 }
2585
2586 static void
2587 txq_emit(
2588 const struct lp_build_tgsi_action * action,
2589 struct lp_build_tgsi_context * bld_base,
2590 struct lp_build_emit_data * emit_data)
2591 {
2592 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2593
2594 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2595 }
2596
2597 static void
2598 txf_emit(
2599 const struct lp_build_tgsi_action * action,
2600 struct lp_build_tgsi_context * bld_base,
2601 struct lp_build_emit_data * emit_data)
2602 {
2603 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2604
2605 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2606 }
2607
2608 static void
2609 sample_i_emit(
2610 const struct lp_build_tgsi_action * action,
2611 struct lp_build_tgsi_context * bld_base,
2612 struct lp_build_emit_data * emit_data)
2613 {
2614 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2615
2616 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2617 }
2618
2619 static void
2620 sample_emit(
2621 const struct lp_build_tgsi_action * action,
2622 struct lp_build_tgsi_context * bld_base,
2623 struct lp_build_emit_data * emit_data)
2624 {
2625 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2626
2627 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2628 FALSE, emit_data->output);
2629 }
2630
2631 static void
2632 sample_b_emit(
2633 const struct lp_build_tgsi_action * action,
2634 struct lp_build_tgsi_context * bld_base,
2635 struct lp_build_emit_data * emit_data)
2636 {
2637 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2638
2639 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2640 FALSE, emit_data->output);
2641 }
2642
2643 static void
2644 sample_c_emit(
2645 const struct lp_build_tgsi_action * action,
2646 struct lp_build_tgsi_context * bld_base,
2647 struct lp_build_emit_data * emit_data)
2648 {
2649 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2650
2651 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2652 TRUE, emit_data->output);
2653 }
2654
2655 static void
2656 sample_c_lz_emit(
2657 const struct lp_build_tgsi_action * action,
2658 struct lp_build_tgsi_context * bld_base,
2659 struct lp_build_emit_data * emit_data)
2660 {
2661 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2662
2663 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2664 TRUE, emit_data->output);
2665 }
2666
2667 static void
2668 sample_d_emit(
2669 const struct lp_build_tgsi_action * action,
2670 struct lp_build_tgsi_context * bld_base,
2671 struct lp_build_emit_data * emit_data)
2672 {
2673 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2674
2675 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2676 FALSE, emit_data->output);
2677 }
2678
2679 static void
2680 sample_l_emit(
2681 const struct lp_build_tgsi_action * action,
2682 struct lp_build_tgsi_context * bld_base,
2683 struct lp_build_emit_data * emit_data)
2684 {
2685 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2686
2687 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2688 FALSE, emit_data->output);
2689 }
2690
2691 static void
2692 sviewinfo_emit(
2693 const struct lp_build_tgsi_action * action,
2694 struct lp_build_tgsi_context * bld_base,
2695 struct lp_build_emit_data * emit_data)
2696 {
2697 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2698
2699 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2700 }
2701
2702 static LLVMValueRef
2703 mask_vec(struct lp_build_tgsi_context *bld_base)
2704 {
2705 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2706 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2707 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2708
2709 if (!exec_mask->has_mask) {
2710 return lp_build_mask_value(bld->mask);
2711 }
2712 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
2713 exec_mask->exec_mask, "");
2714 }
2715
2716 static void
2717 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2718 LLVMValueRef ptr,
2719 LLVMValueRef mask)
2720 {
2721 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2722 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2723
2724 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2725
2726 LLVMBuildStore(builder, current_vec, ptr);
2727 }
2728
2729 static void
2730 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2731 LLVMValueRef ptr,
2732 LLVMValueRef mask)
2733 {
2734 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2735 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2736
2737 current_vec = lp_build_select(&bld_base->uint_bld,
2738 mask,
2739 bld_base->uint_bld.zero,
2740 current_vec);
2741
2742 LLVMBuildStore(builder, current_vec, ptr);
2743 }
2744
2745 static LLVMValueRef
2746 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
2747 LLVMValueRef current_mask_vec,
2748 LLVMValueRef total_emitted_vertices_vec)
2749 {
2750 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2751 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2752 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2753 total_emitted_vertices_vec,
2754 bld->max_output_vertices_vec);
2755
2756 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2757 }
2758
2759 static void
2760 emit_vertex(
2761 const struct lp_build_tgsi_action * action,
2762 struct lp_build_tgsi_context * bld_base,
2763 struct lp_build_emit_data * emit_data)
2764 {
2765 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2766 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2767
2768 if (bld->gs_iface->emit_vertex) {
2769 LLVMValueRef mask = mask_vec(bld_base);
2770 LLVMValueRef total_emitted_vertices_vec =
2771 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2772 mask = clamp_mask_to_max_output_vertices(bld, mask,
2773 total_emitted_vertices_vec);
2774 gather_outputs(bld);
2775 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2776 bld->outputs,
2777 total_emitted_vertices_vec);
2778 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2779 mask);
2780 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2781 mask);
2782 #if DUMP_GS_EMITS
2783 lp_build_print_value(bld->bld_base.base.gallivm,
2784 " +++ emit vertex masked ones = ",
2785 mask);
2786 lp_build_print_value(bld->bld_base.base.gallivm,
2787 " +++ emit vertex emitted = ",
2788 total_emitted_vertices_vec);
2789 #endif
2790 }
2791 }
2792
2793
2794 static void
2795 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
2796 LLVMValueRef mask)
2797 {
2798 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2799 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2800
2801 if (bld->gs_iface->end_primitive) {
2802 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2803 LLVMValueRef emitted_vertices_vec =
2804 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2805 LLVMValueRef emitted_prims_vec =
2806 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2807
2808 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2809 emitted_vertices_vec,
2810 uint_bld->zero);
2811 /* We need to combine the current execution mask with the mask
2812 telling us which, if any, execution slots actually have
2813 unemitted primitives, this way we make sure that end_primitives
2814 executes only on the paths that have unflushed vertices */
2815 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2816
2817 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2818 emitted_vertices_vec,
2819 emitted_prims_vec);
2820
2821 #if DUMP_GS_EMITS
2822 lp_build_print_value(bld->bld_base.base.gallivm,
2823 " +++ end prim masked ones = ",
2824 mask);
2825 lp_build_print_value(bld->bld_base.base.gallivm,
2826 " +++ end prim emitted verts1 = ",
2827 emitted_vertices_vec);
2828 lp_build_print_value(bld->bld_base.base.gallivm,
2829 " +++ end prim emitted prims1 = ",
2830 LLVMBuildLoad(builder,
2831 bld->emitted_prims_vec_ptr, ""));
2832 #endif
2833 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2834 mask);
2835 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2836 mask);
2837 #if DUMP_GS_EMITS
2838 lp_build_print_value(bld->bld_base.base.gallivm,
2839 " +++ end prim emitted verts2 = ",
2840 LLVMBuildLoad(builder,
2841 bld->emitted_vertices_vec_ptr, ""));
2842 #endif
2843 }
2844
2845 }
2846
2847 static void
2848 end_primitive(
2849 const struct lp_build_tgsi_action * action,
2850 struct lp_build_tgsi_context * bld_base,
2851 struct lp_build_emit_data * emit_data)
2852 {
2853 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2854
2855 if (bld->gs_iface->end_primitive) {
2856 LLVMValueRef mask = mask_vec(bld_base);
2857 end_primitive_masked(bld_base, mask);
2858 }
2859 }
2860
2861 static void
2862 cal_emit(
2863 const struct lp_build_tgsi_action * action,
2864 struct lp_build_tgsi_context * bld_base,
2865 struct lp_build_emit_data * emit_data)
2866 {
2867 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2868
2869 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2870 &bld_base->pc);
2871 }
2872
2873 static void
2874 ret_emit(
2875 const struct lp_build_tgsi_action * action,
2876 struct lp_build_tgsi_context * bld_base,
2877 struct lp_build_emit_data * emit_data)
2878 {
2879 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2880
2881 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2882 }
2883
2884 static void
2885 brk_emit(
2886 const struct lp_build_tgsi_action * action,
2887 struct lp_build_tgsi_context * bld_base,
2888 struct lp_build_emit_data * emit_data)
2889 {
2890 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2891
2892 lp_exec_break(&bld->exec_mask, bld_base);
2893 }
2894
2895 static void
2896 breakc_emit(
2897 const struct lp_build_tgsi_action * action,
2898 struct lp_build_tgsi_context * bld_base,
2899 struct lp_build_emit_data * emit_data)
2900 {
2901 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2902 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2903 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2904 LLVMValueRef unsigned_cond =
2905 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
2906 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2907 unsigned_cond,
2908 uint_bld->zero);
2909
2910 lp_exec_break_condition(&bld->exec_mask, cond);
2911 }
2912
2913 static void
2914 if_emit(
2915 const struct lp_build_tgsi_action * action,
2916 struct lp_build_tgsi_context * bld_base,
2917 struct lp_build_emit_data * emit_data)
2918 {
2919 LLVMValueRef tmp;
2920 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2921
2922 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2923 emit_data->args[0], bld->bld_base.base.zero);
2924 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2925 }
2926
2927 static void
2928 uif_emit(
2929 const struct lp_build_tgsi_action * action,
2930 struct lp_build_tgsi_context * bld_base,
2931 struct lp_build_emit_data * emit_data)
2932 {
2933 LLVMValueRef tmp;
2934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2935 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2936
2937 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2938 emit_data->args[0], uint_bld->zero);
2939 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2940 }
2941
2942 static void
2943 case_emit(
2944 const struct lp_build_tgsi_action * action,
2945 struct lp_build_tgsi_context * bld_base,
2946 struct lp_build_emit_data * emit_data)
2947 {
2948 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2949
2950 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
2951 }
2952
2953 static void
2954 default_emit(
2955 const struct lp_build_tgsi_action * action,
2956 struct lp_build_tgsi_context * bld_base,
2957 struct lp_build_emit_data * emit_data)
2958 {
2959 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2960
2961 lp_exec_default(&bld->exec_mask, bld_base);
2962 }
2963
2964 static void
2965 switch_emit(
2966 const struct lp_build_tgsi_action * action,
2967 struct lp_build_tgsi_context * bld_base,
2968 struct lp_build_emit_data * emit_data)
2969 {
2970 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2971
2972 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
2973 }
2974
2975 static void
2976 endswitch_emit(
2977 const struct lp_build_tgsi_action * action,
2978 struct lp_build_tgsi_context * bld_base,
2979 struct lp_build_emit_data * emit_data)
2980 {
2981 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2982
2983 lp_exec_endswitch(&bld->exec_mask, bld_base);
2984 }
2985
2986 static void
2987 bgnloop_emit(
2988 const struct lp_build_tgsi_action * action,
2989 struct lp_build_tgsi_context * bld_base,
2990 struct lp_build_emit_data * emit_data)
2991 {
2992 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2993
2994 lp_exec_bgnloop(&bld->exec_mask);
2995 }
2996
2997 static void
2998 bgnsub_emit(
2999 const struct lp_build_tgsi_action * action,
3000 struct lp_build_tgsi_context * bld_base,
3001 struct lp_build_emit_data * emit_data)
3002 {
3003 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3004
3005 lp_exec_mask_bgnsub(&bld->exec_mask);
3006 }
3007
3008 static void
3009 else_emit(
3010 const struct lp_build_tgsi_action * action,
3011 struct lp_build_tgsi_context * bld_base,
3012 struct lp_build_emit_data * emit_data)
3013 {
3014 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3015
3016 lp_exec_mask_cond_invert(&bld->exec_mask);
3017 }
3018
3019 static void
3020 endif_emit(
3021 const struct lp_build_tgsi_action * action,
3022 struct lp_build_tgsi_context * bld_base,
3023 struct lp_build_emit_data * emit_data)
3024 {
3025 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3026
3027 lp_exec_mask_cond_pop(&bld->exec_mask);
3028 }
3029
3030 static void
3031 endloop_emit(
3032 const struct lp_build_tgsi_action * action,
3033 struct lp_build_tgsi_context * bld_base,
3034 struct lp_build_emit_data * emit_data)
3035 {
3036 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3037
3038 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3039 }
3040
3041 static void
3042 endsub_emit(
3043 const struct lp_build_tgsi_action * action,
3044 struct lp_build_tgsi_context * bld_base,
3045 struct lp_build_emit_data * emit_data)
3046 {
3047 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3048
3049 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3050 }
3051
3052 static void
3053 cont_emit(
3054 const struct lp_build_tgsi_action * action,
3055 struct lp_build_tgsi_context * bld_base,
3056 struct lp_build_emit_data * emit_data)
3057 {
3058 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3059
3060 lp_exec_continue(&bld->exec_mask);
3061 }
3062
3063 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
3064 *
3065 * XXX: What do the comments about xmm registers mean? Maybe they are left over
3066 * from old code, but there is no garauntee that LLVM will use those registers
3067 * for this code.
3068 *
3069 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
3070 * should be handled by the emit_data->fetch_args function. */
3071 static void
3072 nrm_emit(
3073 const struct lp_build_tgsi_action * action,
3074 struct lp_build_tgsi_context * bld_base,
3075 struct lp_build_emit_data * emit_data)
3076 {
3077 LLVMValueRef tmp0, tmp1;
3078 LLVMValueRef tmp4 = NULL;
3079 LLVMValueRef tmp5 = NULL;
3080 LLVMValueRef tmp6 = NULL;
3081 LLVMValueRef tmp7 = NULL;
3082 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3083
3084 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
3085
3086 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
3087 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
3088 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
3089 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
3090
3091 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
3092
3093 /* xmm4 = src.x */
3094 /* xmm0 = src.x * src.x */
3095 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
3096 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
3097 tmp4 = tmp0;
3098 }
3099 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
3100
3101 /* xmm5 = src.y */
3102 /* xmm0 = xmm0 + src.y * src.y */
3103 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
3104 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
3105 tmp5 = tmp1;
3106 }
3107 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3108 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3109
3110 /* xmm6 = src.z */
3111 /* xmm0 = xmm0 + src.z * src.z */
3112 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
3113 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
3114 tmp6 = tmp1;
3115 }
3116 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3117 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3118
3119 if (dims == 4) {
3120 /* xmm7 = src.w */
3121 /* xmm0 = xmm0 + src.w * src.w */
3122 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
3123 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
3124 tmp7 = tmp1;
3125 }
3126 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3127 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3128 }
3129 /* xmm1 = 1 / sqrt(xmm0) */
3130 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
3131 /* dst.x = xmm1 * src.x */
3132 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
3133 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
3134 }
3135 /* dst.y = xmm1 * src.y */
3136 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
3137 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
3138 }
3139
3140 /* dst.z = xmm1 * src.z */
3141 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
3142 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
3143 }
3144 /* dst.w = xmm1 * src.w */
3145 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
3146 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
3147 }
3148 }
3149
3150 /* dst.w = 1.0 */
3151 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
3152 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
3153 }
3154 }
3155
3156 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3157 {
3158 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3159 struct gallivm_state * gallivm = bld_base->base.gallivm;
3160
3161 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3162 LLVMValueRef array_size =
3163 lp_build_const_int32(gallivm,
3164 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3165 bld->temps_array = lp_build_array_alloca(gallivm,
3166 bld_base->base.vec_type, array_size,
3167 "temp_array");
3168 }
3169
3170 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3171 LLVMValueRef array_size =
3172 lp_build_const_int32(gallivm,
3173 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3174 bld->outputs_array = lp_build_array_alloca(gallivm,
3175 bld_base->base.vec_type, array_size,
3176 "output_array");
3177 }
3178
3179 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3180 LLVMValueRef array_size =
3181 lp_build_const_int32(gallivm,
3182 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3183 bld->imms_array = lp_build_array_alloca(gallivm,
3184 bld_base->base.vec_type, array_size,
3185 "imms_array");
3186 }
3187
3188 /* If we have indirect addressing in inputs we need to copy them into
3189 * our alloca array to be able to iterate over them */
3190 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3191 unsigned index, chan;
3192 LLVMTypeRef vec_type = bld_base->base.vec_type;
3193 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3194 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3195 bld->inputs_array = lp_build_array_alloca(gallivm,
3196 vec_type, array_size,
3197 "input_array");
3198
3199 assert(bld_base->info->num_inputs
3200 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3201
3202 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3203 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3204 LLVMValueRef lindex =
3205 lp_build_const_int32(gallivm, index * 4 + chan);
3206 LLVMValueRef input_ptr =
3207 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3208 &lindex, 1, "");
3209 LLVMValueRef value = bld->inputs[index][chan];
3210 if (value)
3211 LLVMBuildStore(gallivm->builder, value, input_ptr);
3212 }
3213 }
3214 }
3215
3216 if (bld->gs_iface) {
3217 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3218 bld->emitted_prims_vec_ptr =
3219 lp_build_alloca(gallivm,
3220 uint_bld->vec_type,
3221 "emitted_prims_ptr");
3222 bld->emitted_vertices_vec_ptr =
3223 lp_build_alloca(gallivm,
3224 uint_bld->vec_type,
3225 "emitted_vertices_ptr");
3226 bld->total_emitted_vertices_vec_ptr =
3227 lp_build_alloca(gallivm,
3228 uint_bld->vec_type,
3229 "total_emitted_vertices_ptr");
3230
3231 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3232 bld->emitted_prims_vec_ptr);
3233 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3234 bld->emitted_vertices_vec_ptr);
3235 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3236 bld->total_emitted_vertices_vec_ptr);
3237 }
3238 }
3239
3240 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3241 {
3242 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3243 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3244
3245 if (0) {
3246 /* for debugging */
3247 emit_dump_temps(bld);
3248 }
3249
3250 /* If we have indirect addressing in outputs we need to copy our alloca array
3251 * to the outputs slots specified by the caller */
3252 if (bld->gs_iface) {
3253 LLVMValueRef total_emitted_vertices_vec;
3254 LLVMValueRef emitted_prims_vec;
3255 /* implicit end_primitives, needed in case there are any unflushed
3256 vertices in the cache. Note must not call end_primitive here
3257 since the exec_mask is not valid at this point. */
3258 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3259
3260 total_emitted_vertices_vec =
3261 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3262 emitted_prims_vec =
3263 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3264
3265 bld->gs_iface->gs_epilogue(bld->gs_iface,
3266 &bld->bld_base,
3267 total_emitted_vertices_vec,
3268 emitted_prims_vec);
3269 } else {
3270 gather_outputs(bld);
3271 }
3272 }
3273
3274 void
3275 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3276 const struct tgsi_token *tokens,
3277 struct lp_type type,
3278 struct lp_build_mask_context *mask,
3279 LLVMValueRef consts_ptr,
3280 const struct lp_bld_tgsi_system_values *system_values,
3281 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3282 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3283 struct lp_build_sampler_soa *sampler,
3284 const struct tgsi_shader_info *info,
3285 const struct lp_build_tgsi_gs_iface *gs_iface)
3286 {
3287 struct lp_build_tgsi_soa_context bld;
3288
3289 struct lp_type res_type;
3290
3291 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3292 memset(&res_type, 0, sizeof res_type);
3293 res_type.width = type.width;
3294 res_type.length = type.length;
3295 res_type.sign = 1;
3296
3297 /* Setup build context */
3298 memset(&bld, 0, sizeof bld);
3299 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3300 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3301 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3302 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3303 bld.mask = mask;
3304 bld.inputs = inputs;
3305 bld.outputs = outputs;
3306 bld.consts_ptr = consts_ptr;
3307 bld.sampler = sampler;
3308 bld.bld_base.info = info;
3309 bld.indirect_files = info->indirect_files;
3310
3311 bld.bld_base.soa = TRUE;
3312 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3313 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3314 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3315 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3316 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3317 bld.bld_base.emit_store = emit_store;
3318
3319 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3320 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3321
3322 bld.bld_base.emit_prologue = emit_prologue;
3323 bld.bld_base.emit_epilogue = emit_epilogue;
3324
3325 /* Set opcode actions */
3326 lp_set_default_actions_cpu(&bld.bld_base);
3327
3328 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3329 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3330 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3331 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3332 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3333 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3334 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3335 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3336 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3337 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3338 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3339 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3340 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3341 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3342 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3343 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3344 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3345 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3346 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3347 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
3348 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
3349 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3350 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3351 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3352 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3353 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3354 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3355 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3356 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3357 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3358 /* DX10 sampling ops */
3359 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3360 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3361 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3362 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3363 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3364 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3365 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3366 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3367
3368 if (gs_iface) {
3369 /* There's no specific value for this because it should always
3370 * be set, but apps using ext_geometry_shader4 quite often
3371 * were forgetting so we're using MAX_VERTEX_VARYING from
3372 * that spec even though we could debug_assert if it's not
3373 * set, but that's a lot uglier. */
3374 uint max_output_vertices = 32;
3375 uint i = 0;
3376 /* inputs are always indirect with gs */
3377 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3378 bld.gs_iface = gs_iface;
3379 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3380 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3381 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3382
3383 for (i = 0; i < info->num_properties; ++i) {
3384 if (info->properties[i].name ==
3385 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
3386 max_output_vertices = info->properties[i].data[0];
3387 }
3388 }
3389 bld.max_output_vertices_vec =
3390 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3391 max_output_vertices);
3392 }
3393
3394 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3395
3396 bld.system_values = *system_values;
3397
3398 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3399
3400 if (0) {
3401 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3402 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3403 debug_printf("11111111111111111111111111111 \n");
3404 tgsi_dump(tokens, 0);
3405 lp_debug_dump_value(function);
3406 debug_printf("2222222222222222222222222222 \n");
3407 }
3408
3409 if (0) {
3410 LLVMModuleRef module = LLVMGetGlobalParent(
3411 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3412 LLVMDumpModule(module);
3413
3414 }
3415 }