affe0592a44e0d2c74ff398e60126286e6f62070
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68 #define DUMP_GS_EMITS 0
69
70 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
71 {
72 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
73 LLVMBuilderRef builder = bld->gallivm->builder;
74
75 mask->bld = bld;
76 mask->has_mask = FALSE;
77 mask->ret_in_main = FALSE;
78 mask->cond_stack_size = 0;
79 mask->loop_stack_size = 0;
80 mask->call_stack_size = 0;
81 mask->switch_stack_size = 0;
82
83 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
84 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
85 mask->cond_mask = mask->switch_mask =
86 LLVMConstAllOnes(mask->int_vec_type);
87
88 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
89
90 LLVMBuildStore(
91 builder,
92 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
93 mask->loop_limiter);
94 }
95
96 static void lp_exec_mask_update(struct lp_exec_mask *mask)
97 {
98 LLVMBuilderRef builder = mask->bld->gallivm->builder;
99
100 if (mask->loop_stack_size) {
101 /*for loops we need to update the entire mask at runtime */
102 LLVMValueRef tmp;
103 assert(mask->break_mask);
104 tmp = LLVMBuildAnd(builder,
105 mask->cont_mask,
106 mask->break_mask,
107 "maskcb");
108 mask->exec_mask = LLVMBuildAnd(builder,
109 mask->cond_mask,
110 tmp,
111 "maskfull");
112 } else
113 mask->exec_mask = mask->cond_mask;
114
115 if (mask->switch_stack_size) {
116 mask->exec_mask = LLVMBuildAnd(builder,
117 mask->exec_mask,
118 mask->switch_mask,
119 "switchmask");
120 }
121
122 if (mask->call_stack_size || mask->ret_in_main) {
123 mask->exec_mask = LLVMBuildAnd(builder,
124 mask->exec_mask,
125 mask->ret_mask,
126 "callmask");
127 }
128
129 mask->has_mask = (mask->cond_stack_size > 0 ||
130 mask->loop_stack_size > 0 ||
131 mask->call_stack_size > 0 ||
132 mask->switch_stack_size > 0 ||
133 mask->ret_in_main);
134 }
135
136 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
137 LLVMValueRef val)
138 {
139 LLVMBuilderRef builder = mask->bld->gallivm->builder;
140
141 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
142 if (mask->cond_stack_size == 0) {
143 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
144 }
145 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
146 assert(LLVMTypeOf(val) == mask->int_vec_type);
147 mask->cond_mask = LLVMBuildAnd(builder,
148 mask->cond_mask,
149 val,
150 "");
151 lp_exec_mask_update(mask);
152 }
153
154 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
155 {
156 LLVMBuilderRef builder = mask->bld->gallivm->builder;
157 LLVMValueRef prev_mask;
158 LLVMValueRef inv_mask;
159
160 assert(mask->cond_stack_size);
161 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
162 if (mask->cond_stack_size == 1) {
163 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
164 }
165
166 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
167
168 mask->cond_mask = LLVMBuildAnd(builder,
169 inv_mask,
170 prev_mask, "");
171 lp_exec_mask_update(mask);
172 }
173
174 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
175 {
176 assert(mask->cond_stack_size);
177 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
178 lp_exec_mask_update(mask);
179 }
180
181 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
182 {
183 LLVMBuilderRef builder = mask->bld->gallivm->builder;
184
185 if (mask->loop_stack_size == 0) {
186 assert(mask->loop_block == NULL);
187 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
188 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
189 assert(mask->break_var == NULL);
190 }
191
192 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
193
194 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
195 mask->break_type;
196 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
197
198 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
199 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
200 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
201 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
202 ++mask->loop_stack_size;
203
204 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
205 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
206
207 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
208
209 LLVMBuildBr(builder, mask->loop_block);
210 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
211
212 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
213
214 lp_exec_mask_update(mask);
215 }
216
217 static void lp_exec_break(struct lp_exec_mask *mask,
218 struct lp_build_tgsi_context * bld_base)
219 {
220 LLVMBuilderRef builder = mask->bld->gallivm->builder;
221
222 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
223 LLVMValueRef exec_mask = LLVMBuildNot(builder,
224 mask->exec_mask,
225 "break");
226
227 mask->break_mask = LLVMBuildAnd(builder,
228 mask->break_mask,
229 exec_mask, "break_full");
230 }
231 else {
232 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
233 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
234 opcode == TGSI_OPCODE_CASE);
235
236
237 if (mask->switch_in_default) {
238 /*
239 * stop default execution but only if this is an unconditional switch.
240 * (The condition here is not perfect since dead code after break is
241 * allowed but should be sufficient since false negatives are just
242 * unoptimized - so we don't have to pre-evaluate that).
243 */
244 if(break_always && mask->switch_pc) {
245 bld_base->pc = mask->switch_pc;
246 return;
247 }
248 }
249
250 if (break_always) {
251 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
252 }
253 else {
254 LLVMValueRef exec_mask = LLVMBuildNot(builder,
255 mask->exec_mask,
256 "break");
257 mask->switch_mask = LLVMBuildAnd(builder,
258 mask->switch_mask,
259 exec_mask, "break_switch");
260 }
261 }
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_break_condition(struct lp_exec_mask *mask,
267 LLVMValueRef cond)
268 {
269 LLVMBuilderRef builder = mask->bld->gallivm->builder;
270 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
271 mask->exec_mask,
272 cond, "cond_mask");
273 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
274
275 if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
276 mask->break_mask = LLVMBuildAnd(builder,
277 mask->break_mask,
278 cond_mask, "breakc_full");
279 }
280 else {
281 mask->switch_mask = LLVMBuildAnd(builder,
282 mask->switch_mask,
283 cond_mask, "breakc_switch");
284 }
285
286 lp_exec_mask_update(mask);
287 }
288
289 static void lp_exec_continue(struct lp_exec_mask *mask)
290 {
291 LLVMBuilderRef builder = mask->bld->gallivm->builder;
292 LLVMValueRef exec_mask = LLVMBuildNot(builder,
293 mask->exec_mask,
294 "");
295
296 mask->cont_mask = LLVMBuildAnd(builder,
297 mask->cont_mask,
298 exec_mask, "");
299
300 lp_exec_mask_update(mask);
301 }
302
303
304 static void lp_exec_endloop(struct gallivm_state *gallivm,
305 struct lp_exec_mask *mask)
306 {
307 LLVMBuilderRef builder = mask->bld->gallivm->builder;
308 LLVMBasicBlockRef endloop;
309 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
310 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
311 mask->bld->type.width *
312 mask->bld->type.length);
313 LLVMValueRef i1cond, i2cond, icond, limiter;
314
315 assert(mask->break_mask);
316
317 /*
318 * Restore the cont_mask, but don't pop
319 */
320 assert(mask->loop_stack_size);
321 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
322 lp_exec_mask_update(mask);
323
324 /*
325 * Unlike the continue mask, the break_mask must be preserved across loop
326 * iterations
327 */
328 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
329
330 /* Decrement the loop limiter */
331 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
332
333 limiter = LLVMBuildSub(
334 builder,
335 limiter,
336 LLVMConstInt(int_type, 1, false),
337 "");
338
339 LLVMBuildStore(builder, limiter, mask->loop_limiter);
340
341 /* i1cond = (mask != 0) */
342 i1cond = LLVMBuildICmp(
343 builder,
344 LLVMIntNE,
345 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
346 LLVMConstNull(reg_type), "i1cond");
347
348 /* i2cond = (looplimiter > 0) */
349 i2cond = LLVMBuildICmp(
350 builder,
351 LLVMIntSGT,
352 limiter,
353 LLVMConstNull(int_type), "i2cond");
354
355 /* if( i1cond && i2cond ) */
356 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
357
358 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
359
360 LLVMBuildCondBr(builder,
361 icond, mask->loop_block, endloop);
362
363 LLVMPositionBuilderAtEnd(builder, endloop);
364
365 assert(mask->loop_stack_size);
366 --mask->loop_stack_size;
367 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
368 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
369 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
370 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
371 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
372
373 lp_exec_mask_update(mask);
374 }
375
376 static void lp_exec_switch(struct lp_exec_mask *mask,
377 LLVMValueRef switchval)
378 {
379 mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
380 mask->break_type;
381 mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
382
383 mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
384 mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
385 mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
386 mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
387 mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
388 mask->switch_stack_size++;
389
390 mask->switch_val = switchval;
391 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
392 mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
393 mask->switch_in_default = false;
394 mask->switch_pc = 0;
395
396 lp_exec_mask_update(mask);
397 }
398
399 static void lp_exec_endswitch(struct lp_exec_mask *mask,
400 struct lp_build_tgsi_context * bld_base)
401 {
402 LLVMBuilderRef builder = mask->bld->gallivm->builder;
403
404 /* check if there's deferred default if so do it now */
405 if (mask->switch_pc && !mask->switch_in_default) {
406 LLVMValueRef prevmask, defaultmask;
407 unsigned tmp_pc;
408 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
409 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
410 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
411 mask->switch_in_default = true;
412
413 lp_exec_mask_update(mask);
414
415 assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
416 TGSI_OPCODE_DEFAULT);
417
418 tmp_pc = bld_base->pc;
419 bld_base->pc = mask->switch_pc;
420 /*
421 * re-purpose switch_pc to point to here again, since we stop execution of
422 * the deferred default after next break.
423 */
424 mask->switch_pc = tmp_pc - 1;
425
426 return;
427 }
428
429 else if (mask->switch_pc && mask->switch_in_default) {
430 assert(bld_base->pc == mask->switch_pc + 1);
431 }
432
433 mask->switch_stack_size--;
434 mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
435 mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
436 mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
437 mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
438 mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
439
440 mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
441
442 lp_exec_mask_update(mask);
443 }
444
445 static void lp_exec_case(struct lp_exec_mask *mask,
446 LLVMValueRef caseval)
447 {
448 LLVMBuilderRef builder = mask->bld->gallivm->builder;
449
450 LLVMValueRef casemask, prevmask;
451
452 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
453 if (!mask->switch_in_default) {
454 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
455 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
456 mask->switch_mask_default = LLVMBuildOr(builder, casemask,
457 mask->switch_mask_default, "sw_default_mask");
458 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
459 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
460
461 lp_exec_mask_update(mask);
462 }
463 }
464
465 /*
466 * Analyse default statement in a switch.
467 * \return true if default is last statement, false otherwise
468 * \param default_pc_start contains pc of instruction to jump to
469 * if default wasn't last but there's no
470 * fallthrough into default.
471 */
472 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
473 struct lp_build_tgsi_context * bld_base,
474 int *default_pc_start)
475 {
476 unsigned pc = bld_base->pc;
477 unsigned curr_switch_stack = mask->switch_stack_size;
478
479 /* skip over case statements which are together with default */
480 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
481 pc++;
482 }
483
484 while (pc != -1 && pc < bld_base->num_instructions) {
485 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
486 switch (opcode) {
487 case TGSI_OPCODE_CASE:
488 if (curr_switch_stack == mask->switch_stack_size) {
489 *default_pc_start = pc - 1;
490 return false;
491 }
492 break;
493 case TGSI_OPCODE_SWITCH:
494 curr_switch_stack++;
495 break;
496 case TGSI_OPCODE_ENDSWITCH:
497 if (curr_switch_stack == mask->switch_stack_size) {
498 *default_pc_start = pc - 1;
499 return true;
500 }
501 curr_switch_stack--;
502 break;
503 }
504 pc++;
505 }
506 /* should never arrive here */
507 assert(0);
508 return true;
509 }
510
511 static void lp_exec_default(struct lp_exec_mask *mask,
512 struct lp_build_tgsi_context * bld_base)
513 {
514 LLVMBuilderRef builder = mask->bld->gallivm->builder;
515
516 int default_exec_pc;
517 boolean default_is_last;
518
519 /*
520 * This is a messy opcode, because it may not be always at the end and
521 * there can be fallthrough in and out of it.
522 */
523
524 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
525 /*
526 * If it is last statement in switch (note that case statements appearing
527 * "at the same time" as default don't change that) everything is just fine,
528 * update switch mask and go on. This means we can handle default with
529 * fallthrough INTO it without overhead, if it is last.
530 */
531 if (default_is_last) {
532 LLVMValueRef prevmask, defaultmask;
533 prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
534 defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
535 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
536 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
537 mask->switch_in_default = true;
538
539 lp_exec_mask_update(mask);
540 }
541 else {
542 /*
543 * Technically, "case" immediately before default isn't really a
544 * fallthrough, however we still have to count them as such as we
545 * already have updated the masks.
546 * If that happens in practice could add a switch optimizer pass
547 * which just gets rid of all case statements appearing together with
548 * default (or could do switch analysis at switch start time instead).
549 */
550 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
551 boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
552 opcode != TGSI_OPCODE_SWITCH);
553 /*
554 * If it is not last statement and there was no fallthrough into it,
555 * we record the PC and continue execution at next case (again, those
556 * case encountered at the same time don't count). At endswitch
557 * time, we update switchmask, and go back executing the code we skipped
558 * until the next break (possibly re-executing some code with changed mask
559 * if there was a fallthrough out of default).
560 * Finally, if it is not last statement and there was a fallthrough into it,
561 * do the same as with the former case, except instead of skipping the code
562 * just execute it without updating the mask, then go back and re-execute.
563 */
564 mask->switch_pc = bld_base->pc;
565 if (!ft_into) {
566 bld_base->pc = default_exec_pc;
567 }
568 }
569 }
570
571
572 /* stores val into an address pointed to by dst_ptr.
573 * mask->exec_mask is used to figure out which bits of val
574 * should be stored into the address
575 * (0 means don't store this bit, 1 means do store).
576 */
577 static void lp_exec_mask_store(struct lp_exec_mask *mask,
578 struct lp_build_context *bld_store,
579 LLVMValueRef pred,
580 LLVMValueRef val,
581 LLVMValueRef dst_ptr)
582 {
583 LLVMBuilderRef builder = mask->bld->gallivm->builder;
584
585 assert(lp_check_value(bld_store->type, val));
586 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
587 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
588
589 /* Mix the predicate and execution mask */
590 if (mask->has_mask) {
591 if (pred) {
592 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
593 } else {
594 pred = mask->exec_mask;
595 }
596 }
597
598 if (pred) {
599 LLVMValueRef res, dst;
600
601 dst = LLVMBuildLoad(builder, dst_ptr, "");
602 res = lp_build_select(bld_store, pred, val, dst);
603 LLVMBuildStore(builder, res, dst_ptr);
604 } else
605 LLVMBuildStore(builder, val, dst_ptr);
606 }
607
608 static void lp_exec_mask_call(struct lp_exec_mask *mask,
609 int func,
610 int *pc)
611 {
612 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
613 mask->call_stack[mask->call_stack_size].pc = *pc;
614 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
615 mask->call_stack_size++;
616 *pc = func;
617 }
618
619 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
620 {
621 LLVMBuilderRef builder = mask->bld->gallivm->builder;
622 LLVMValueRef exec_mask;
623
624 if (mask->cond_stack_size == 0 &&
625 mask->loop_stack_size == 0 &&
626 mask->switch_stack_size == 0 &&
627 mask->call_stack_size == 0) {
628 /* returning from main() */
629 *pc = -1;
630 return;
631 }
632
633 if (mask->call_stack_size == 0) {
634 /*
635 * This requires special handling since we need to ensure
636 * we don't drop the mask even if we have no call stack
637 * (e.g. after a ret in a if clause after the endif)
638 */
639 mask->ret_in_main = TRUE;
640 }
641
642 exec_mask = LLVMBuildNot(builder,
643 mask->exec_mask,
644 "ret");
645
646 mask->ret_mask = LLVMBuildAnd(builder,
647 mask->ret_mask,
648 exec_mask, "ret_full");
649
650 lp_exec_mask_update(mask);
651 }
652
653 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
654 {
655 }
656
657 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
658 {
659 assert(mask->call_stack_size);
660 mask->call_stack_size--;
661 *pc = mask->call_stack[mask->call_stack_size].pc;
662 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
663 lp_exec_mask_update(mask);
664 }
665
666
667 /**
668 * Return pointer to a temporary register channel (src or dest).
669 * Note that indirect addressing cannot be handled here.
670 * \param index which temporary register
671 * \param chan which channel of the temp register.
672 */
673 LLVMValueRef
674 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
675 unsigned index,
676 unsigned chan)
677 {
678 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
679 assert(chan < 4);
680 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
681 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
682 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
683 }
684 else {
685 return bld->temps[index][chan];
686 }
687 }
688
689 /**
690 * Return pointer to a output register channel (src or dest).
691 * Note that indirect addressing cannot be handled here.
692 * \param index which output register
693 * \param chan which channel of the output register.
694 */
695 LLVMValueRef
696 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
697 unsigned index,
698 unsigned chan)
699 {
700 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
701 assert(chan < 4);
702 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
703 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
704 index * 4 + chan);
705 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
706 }
707 else {
708 return bld->outputs[index][chan];
709 }
710 }
711
712 /*
713 * If we have indirect addressing in outputs copy our alloca array
714 * to the outputs slots specified by the caller to make sure
715 * our outputs are delivered consistently via the same interface.
716 */
717 static void
718 gather_outputs(struct lp_build_tgsi_soa_context * bld)
719 {
720 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
721 unsigned index, chan;
722 assert(bld->bld_base.info->num_outputs <=
723 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
724 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
725 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
726 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
727 }
728 }
729 }
730 }
731
732 /**
733 * Gather vector.
734 * XXX the lp_build_gather() function should be capable of doing this
735 * with a little work.
736 */
737 static LLVMValueRef
738 build_gather(struct lp_build_context *bld,
739 LLVMValueRef base_ptr,
740 LLVMValueRef indexes)
741 {
742 LLVMBuilderRef builder = bld->gallivm->builder;
743 LLVMValueRef res = bld->undef;
744 unsigned i;
745
746 /*
747 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
748 */
749 for (i = 0; i < bld->type.length; i++) {
750 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
751 LLVMValueRef index = LLVMBuildExtractElement(builder,
752 indexes, ii, "");
753 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
754 &index, 1, "gather_ptr");
755 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
756
757 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
758 }
759
760 return res;
761 }
762
763
764 /**
765 * Scatter/store vector.
766 */
767 static void
768 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
769 LLVMValueRef base_ptr,
770 LLVMValueRef indexes,
771 LLVMValueRef values,
772 struct lp_exec_mask *mask,
773 LLVMValueRef pred)
774 {
775 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
776 LLVMBuilderRef builder = gallivm->builder;
777 unsigned i;
778
779 /* Mix the predicate and execution mask */
780 if (mask->has_mask) {
781 if (pred) {
782 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
783 }
784 else {
785 pred = mask->exec_mask;
786 }
787 }
788
789 /*
790 * Loop over elements of index_vec, store scalar value.
791 */
792 for (i = 0; i < bld->bld_base.base.type.length; i++) {
793 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
794 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
795 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
796 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
797 LLVMValueRef scalar_pred = pred ?
798 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
799
800 if (0)
801 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
802 ii, val, index, scalar_ptr);
803
804 if (scalar_pred) {
805 LLVMValueRef real_val, dst_val;
806 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
807 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
808 LLVMBuildStore(builder, real_val, scalar_ptr);
809 }
810 else {
811 LLVMBuildStore(builder, val, scalar_ptr);
812 }
813 }
814 }
815
816
817 /**
818 * Read the current value of the ADDR register, convert the floats to
819 * ints, add the base index and return the vector of offsets.
820 * The offsets will be used to index into the constant buffer or
821 * temporary register file.
822 */
823 static LLVMValueRef
824 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
825 unsigned reg_file, unsigned reg_index,
826 const struct tgsi_ind_register *indirect_reg)
827 {
828 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
829 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
830 /* always use X component of address register */
831 unsigned swizzle = indirect_reg->Swizzle;
832 LLVMValueRef base;
833 LLVMValueRef rel;
834 LLVMValueRef max_index;
835 LLVMValueRef index;
836
837 assert(bld->indirect_files & (1 << reg_file));
838
839 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
840
841 assert(swizzle < 4);
842 switch (indirect_reg->File) {
843 case TGSI_FILE_ADDRESS:
844 rel = LLVMBuildLoad(builder,
845 bld->addr[indirect_reg->Index][swizzle],
846 "load addr reg");
847 /* ADDR LLVM values already have LLVM integer type. */
848 break;
849 case TGSI_FILE_TEMPORARY:
850 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
851 rel = LLVMBuildLoad(builder, rel, "load temp reg");
852 /* TEMP LLVM values always have LLVM float type, but for indirection, the
853 * value actually stored is expected to be an integer */
854 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
855 break;
856 default:
857 assert(0);
858 rel = uint_bld->zero;
859 }
860
861 index = lp_build_add(uint_bld, base, rel);
862
863 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
864 uint_bld->type,
865 bld->bld_base.info->file_max[reg_file]);
866
867 assert(!uint_bld->type.sign);
868 index = lp_build_min(uint_bld, index, max_index);
869
870 return index;
871 }
872
873 static struct lp_build_context *
874 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
875 enum tgsi_opcode_type stype)
876 {
877 struct lp_build_context *bld_fetch;
878
879 switch (stype) {
880 case TGSI_TYPE_FLOAT:
881 case TGSI_TYPE_UNTYPED:
882 bld_fetch = &bld_base->base;
883 break;
884 case TGSI_TYPE_UNSIGNED:
885 bld_fetch = &bld_base->uint_bld;
886 break;
887 case TGSI_TYPE_SIGNED:
888 bld_fetch = &bld_base->int_bld;
889 break;
890 case TGSI_TYPE_VOID:
891 case TGSI_TYPE_DOUBLE:
892 default:
893 assert(0);
894 bld_fetch = NULL;
895 break;
896 }
897 return bld_fetch;
898 }
899
900 static LLVMValueRef
901 emit_fetch_constant(
902 struct lp_build_tgsi_context * bld_base,
903 const struct tgsi_full_src_register * reg,
904 enum tgsi_opcode_type stype,
905 unsigned swizzle)
906 {
907 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
908 struct gallivm_state *gallivm = bld_base->base.gallivm;
909 LLVMBuilderRef builder = gallivm->builder;
910 struct lp_build_context *uint_bld = &bld_base->uint_bld;
911 LLVMValueRef indirect_index = NULL;
912 unsigned dimension = 0;
913 LLVMValueRef dimension_index;
914 LLVMValueRef consts_ptr;
915 LLVMValueRef res;
916
917 /* XXX: Handle fetching xyzw components as a vector */
918 assert(swizzle != ~0);
919
920 if (reg->Register.Dimension) {
921 assert(!reg->Dimension.Indirect);
922 dimension = reg->Dimension.Index;
923 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
924 }
925
926 dimension_index = lp_build_const_int32(gallivm, dimension);
927 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
928
929 if (reg->Register.Indirect) {
930 indirect_index = get_indirect_index(bld,
931 reg->Register.File,
932 reg->Register.Index,
933 &reg->Indirect);
934 }
935
936 if (reg->Register.Indirect) {
937 LLVMValueRef swizzle_vec =
938 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
939 LLVMValueRef index_vec; /* index into the const buffer */
940
941 /* index_vec = indirect_index * 4 + swizzle */
942 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
943 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
944
945 /* Gather values from the constant buffer */
946 res = build_gather(&bld_base->base, consts_ptr, index_vec);
947 }
948 else {
949 LLVMValueRef index; /* index into the const buffer */
950 LLVMValueRef scalar, scalar_ptr;
951
952 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
953
954 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
955 &index, 1, "");
956 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
957 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
958 }
959
960 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
961 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
962 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
963 }
964 return res;
965 }
966
967 static LLVMValueRef
968 emit_fetch_immediate(
969 struct lp_build_tgsi_context * bld_base,
970 const struct tgsi_full_src_register * reg,
971 enum tgsi_opcode_type stype,
972 unsigned swizzle)
973 {
974 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
975 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
976 LLVMBuilderRef builder = gallivm->builder;
977 struct lp_build_context *uint_bld = &bld_base->uint_bld;
978 struct lp_build_context *float_bld = &bld_base->base;
979 LLVMValueRef res = NULL;
980 LLVMValueRef indirect_index = NULL;
981
982 if (reg->Register.Indirect) {
983 indirect_index = get_indirect_index(bld,
984 reg->Register.File,
985 reg->Register.Index,
986 &reg->Indirect);
987 }
988
989 if (reg->Register.Indirect) {
990 LLVMValueRef swizzle_vec =
991 lp_build_const_int_vec(bld->bld_base.base.gallivm,
992 uint_bld->type, swizzle);
993 LLVMValueRef length_vec =
994 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
995 bld->bld_base.base.type.length);
996 LLVMValueRef index_vec; /* index into the const buffer */
997 LLVMValueRef imms_array;
998 LLVMValueRef pixel_offsets;
999 LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
1000 LLVMTypeRef float4_ptr_type;
1001 int i;
1002
1003 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1004 for (i = 0; i < float_bld->type.length; i++) {
1005 offsets[i] = lp_build_const_int32(gallivm, i);
1006 }
1007 pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
1008
1009 /* index_vec = (indirect_index * 4 + swizzle) * length */
1010 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1011 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1012 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1013 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1014
1015 /* cast imms_array pointer to float* */
1016 float4_ptr_type = LLVMPointerType(
1017 LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
1018 imms_array = LLVMBuildBitCast(builder, bld->imms_array,
1019 float4_ptr_type, "");
1020
1021 /* Gather values from the temporary register array */
1022 res = build_gather(&bld_base->base, imms_array, index_vec);
1023 }
1024 else {
1025 res = bld->immediates[reg->Register.Index][swizzle];
1026 }
1027
1028 if (stype == TGSI_TYPE_UNSIGNED) {
1029 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1030 } else if (stype == TGSI_TYPE_SIGNED) {
1031 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1032 }
1033 return res;
1034 }
1035
1036 static LLVMValueRef
1037 emit_fetch_input(
1038 struct lp_build_tgsi_context * bld_base,
1039 const struct tgsi_full_src_register * reg,
1040 enum tgsi_opcode_type stype,
1041 unsigned swizzle)
1042 {
1043 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1044 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1045 LLVMBuilderRef builder = gallivm->builder;
1046 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1047 LLVMValueRef indirect_index = NULL;
1048 LLVMValueRef res;
1049
1050 if (reg->Register.Indirect) {
1051 indirect_index = get_indirect_index(bld,
1052 reg->Register.File,
1053 reg->Register.Index,
1054 &reg->Indirect);
1055 }
1056
1057 if (reg->Register.Indirect) {
1058 LLVMValueRef swizzle_vec =
1059 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1060 LLVMValueRef length_vec =
1061 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1062 LLVMValueRef index_vec; /* index into the const buffer */
1063 LLVMValueRef inputs_array;
1064 LLVMTypeRef float4_ptr_type;
1065
1066 /* index_vec = (indirect_index * 4 + swizzle) * length */
1067 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1068 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1069 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1070
1071 /* cast inputs_array pointer to float* */
1072 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1073 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
1074 float4_ptr_type, "");
1075
1076 /* Gather values from the temporary register array */
1077 res = build_gather(&bld_base->base, inputs_array, index_vec);
1078 } else {
1079 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1080 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1081 reg->Register.Index * 4 + swizzle);
1082 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1083 bld->inputs_array, &lindex, 1, "");
1084 res = LLVMBuildLoad(builder, input_ptr, "");
1085 }
1086 else {
1087 res = bld->inputs[reg->Register.Index][swizzle];
1088 }
1089 }
1090
1091 assert(res);
1092
1093 if (stype == TGSI_TYPE_UNSIGNED) {
1094 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1095 } else if (stype == TGSI_TYPE_SIGNED) {
1096 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1097 }
1098
1099 return res;
1100 }
1101
1102
1103 static LLVMValueRef
1104 emit_fetch_gs_input(
1105 struct lp_build_tgsi_context * bld_base,
1106 const struct tgsi_full_src_register * reg,
1107 enum tgsi_opcode_type stype,
1108 unsigned swizzle)
1109 {
1110 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1111 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1112 LLVMBuilderRef builder = gallivm->builder;
1113 LLVMValueRef attrib_index = NULL;
1114 LLVMValueRef vertex_index = NULL;
1115 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1116 LLVMValueRef res;
1117
1118 if (reg->Register.Indirect) {
1119 attrib_index = get_indirect_index(bld,
1120 reg->Register.File,
1121 reg->Register.Index,
1122 &reg->Indirect);
1123 } else {
1124 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1125 }
1126
1127 if (reg->Dimension.Indirect) {
1128 vertex_index = get_indirect_index(bld,
1129 reg->Register.File,
1130 reg->Dimension.Index,
1131 &reg->DimIndirect);
1132 } else {
1133 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1134 }
1135
1136 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1137 reg->Dimension.Indirect,
1138 vertex_index, attrib_index,
1139 swizzle_index);
1140
1141 assert(res);
1142
1143 if (stype == TGSI_TYPE_UNSIGNED) {
1144 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1145 } else if (stype == TGSI_TYPE_SIGNED) {
1146 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1147 }
1148
1149 return res;
1150 }
1151
1152 static LLVMValueRef
1153 emit_fetch_temporary(
1154 struct lp_build_tgsi_context * bld_base,
1155 const struct tgsi_full_src_register * reg,
1156 enum tgsi_opcode_type stype,
1157 unsigned swizzle)
1158 {
1159 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1160 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1161 LLVMBuilderRef builder = gallivm->builder;
1162 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1163 struct lp_build_context *float_bld = &bld_base->base;
1164 LLVMValueRef indirect_index = NULL;
1165 LLVMValueRef res;
1166
1167 if (reg->Register.Indirect) {
1168 indirect_index = get_indirect_index(bld,
1169 reg->Register.File,
1170 reg->Register.Index,
1171 &reg->Indirect);
1172 }
1173
1174 if (reg->Register.Indirect) {
1175 LLVMValueRef swizzle_vec =
1176 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
1177 LLVMValueRef length_vec =
1178 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
1179 bld->bld_base.base.type.length);
1180 LLVMValueRef index_vec; /* index into the const buffer */
1181 LLVMValueRef temps_array;
1182 LLVMValueRef pixel_offsets;
1183 LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
1184 LLVMTypeRef float4_ptr_type;
1185 int i;
1186
1187 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1188 for (i = 0; i < float_bld->type.length; i++) {
1189 offsets[i] = lp_build_const_int32(gallivm, i);
1190 }
1191 pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
1192
1193 /* index_vec = (indirect_index * 4 + swizzle) * length */
1194 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1195 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1196 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1197 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1198
1199 /* cast temps_array pointer to float* */
1200 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
1201 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1202 float4_ptr_type, "");
1203
1204 /* Gather values from the temporary register array */
1205 res = build_gather(&bld_base->base, temps_array, index_vec);
1206 }
1207 else {
1208 LLVMValueRef temp_ptr;
1209 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1210 res = LLVMBuildLoad(builder, temp_ptr, "");
1211 }
1212
1213 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1214 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1215 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1216 }
1217
1218 return res;
1219 }
1220
1221 static LLVMValueRef
1222 emit_fetch_system_value(
1223 struct lp_build_tgsi_context * bld_base,
1224 const struct tgsi_full_src_register * reg,
1225 enum tgsi_opcode_type stype,
1226 unsigned swizzle)
1227 {
1228 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1229 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1230 const struct tgsi_shader_info *info = bld->bld_base.info;
1231 LLVMBuilderRef builder = gallivm->builder;
1232 LLVMValueRef res;
1233 enum tgsi_opcode_type atype; // Actual type of the value
1234
1235 assert(!reg->Register.Indirect);
1236
1237 switch (info->system_value_semantic_name[reg->Register.Index]) {
1238 case TGSI_SEMANTIC_INSTANCEID:
1239 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1240 atype = TGSI_TYPE_UNSIGNED;
1241 break;
1242
1243 case TGSI_SEMANTIC_VERTEXID:
1244 res = bld->system_values.vertex_id;
1245 atype = TGSI_TYPE_UNSIGNED;
1246 break;
1247
1248 case TGSI_SEMANTIC_PRIMID:
1249 res = bld->system_values.prim_id;
1250 atype = TGSI_TYPE_UNSIGNED;
1251 break;
1252
1253 default:
1254 assert(!"unexpected semantic in emit_fetch_system_value");
1255 res = bld_base->base.zero;
1256 atype = TGSI_TYPE_FLOAT;
1257 break;
1258 }
1259
1260 if (atype != stype) {
1261 if (stype == TGSI_TYPE_FLOAT) {
1262 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1263 } else if (stype == TGSI_TYPE_UNSIGNED) {
1264 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1265 } else if (stype == TGSI_TYPE_SIGNED) {
1266 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1267 }
1268 }
1269
1270 return res;
1271 }
1272
1273 /**
1274 * Register fetch with derivatives.
1275 */
1276 static void
1277 emit_fetch_deriv(
1278 struct lp_build_tgsi_soa_context *bld,
1279 LLVMValueRef src,
1280 LLVMValueRef *res,
1281 LLVMValueRef *ddx,
1282 LLVMValueRef *ddy)
1283 {
1284 if(res)
1285 *res = src;
1286
1287 /* TODO: use interpolation coeffs for inputs */
1288
1289 if(ddx)
1290 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1291
1292 if(ddy)
1293 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1294 }
1295
1296
1297 /**
1298 * Predicate.
1299 */
1300 static void
1301 emit_fetch_predicate(
1302 struct lp_build_tgsi_soa_context *bld,
1303 const struct tgsi_full_instruction *inst,
1304 LLVMValueRef *pred)
1305 {
1306 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1307 unsigned index;
1308 unsigned char swizzles[4];
1309 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1310 LLVMValueRef value;
1311 unsigned chan;
1312
1313 if (!inst->Instruction.Predicate) {
1314 TGSI_FOR_EACH_CHANNEL( chan ) {
1315 pred[chan] = NULL;
1316 }
1317 return;
1318 }
1319
1320 swizzles[0] = inst->Predicate.SwizzleX;
1321 swizzles[1] = inst->Predicate.SwizzleY;
1322 swizzles[2] = inst->Predicate.SwizzleZ;
1323 swizzles[3] = inst->Predicate.SwizzleW;
1324
1325 index = inst->Predicate.Index;
1326 assert(index < LP_MAX_TGSI_PREDS);
1327
1328 TGSI_FOR_EACH_CHANNEL( chan ) {
1329 unsigned swizzle = swizzles[chan];
1330
1331 /*
1332 * Only fetch the predicate register channels that are actually listed
1333 * in the swizzles
1334 */
1335 if (!unswizzled[swizzle]) {
1336 value = LLVMBuildLoad(builder,
1337 bld->preds[index][swizzle], "");
1338
1339 /*
1340 * Convert the value to an integer mask.
1341 *
1342 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1343 * is needlessly causing two comparisons due to storing the intermediate
1344 * result as float vector instead of an integer mask vector.
1345 */
1346 value = lp_build_compare(bld->bld_base.base.gallivm,
1347 bld->bld_base.base.type,
1348 PIPE_FUNC_NOTEQUAL,
1349 value,
1350 bld->bld_base.base.zero);
1351 if (inst->Predicate.Negate) {
1352 value = LLVMBuildNot(builder, value, "");
1353 }
1354
1355 unswizzled[swizzle] = value;
1356 } else {
1357 value = unswizzled[swizzle];
1358 }
1359
1360 pred[chan] = value;
1361 }
1362 }
1363
1364 /**
1365 * Register store.
1366 */
1367 static void
1368 emit_store_chan(
1369 struct lp_build_tgsi_context *bld_base,
1370 const struct tgsi_full_instruction *inst,
1371 unsigned index,
1372 unsigned chan_index,
1373 LLVMValueRef pred,
1374 LLVMValueRef value)
1375 {
1376 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1377 struct gallivm_state *gallivm = bld_base->base.gallivm;
1378 LLVMBuilderRef builder = gallivm->builder;
1379 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1380 struct lp_build_context *float_bld = &bld_base->base;
1381 struct lp_build_context *int_bld = &bld_base->int_bld;
1382 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1383 LLVMValueRef indirect_index = NULL;
1384 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1385
1386 /*
1387 * Apply saturation.
1388 *
1389 * It is always assumed to be float.
1390 */
1391 switch( inst->Instruction.Saturate ) {
1392 case TGSI_SAT_NONE:
1393 break;
1394
1395 case TGSI_SAT_ZERO_ONE:
1396 assert(dtype == TGSI_TYPE_FLOAT ||
1397 dtype == TGSI_TYPE_UNTYPED);
1398 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1399 value = lp_build_max_ext(float_bld, value, float_bld->zero,
1400 GALLIVM_NAN_RETURN_SECOND);
1401 value = lp_build_min_ext(float_bld, value, float_bld->one,
1402 GALLIVM_NAN_BEHAVIOR_UNDEFINED);
1403 break;
1404
1405 case TGSI_SAT_MINUS_PLUS_ONE:
1406 assert(dtype == TGSI_TYPE_FLOAT ||
1407 dtype == TGSI_TYPE_UNTYPED);
1408 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1409 value = lp_build_max_ext(float_bld, value,
1410 lp_build_const_vec(gallivm, float_bld->type, -1.0),
1411 GALLIVM_NAN_RETURN_SECOND);
1412 value = lp_build_min_ext(float_bld, value, float_bld->one,
1413 GALLIVM_NAN_BEHAVIOR_UNDEFINED);
1414 break;
1415
1416 default:
1417 assert(0);
1418 }
1419
1420 if (reg->Register.Indirect) {
1421 indirect_index = get_indirect_index(bld,
1422 reg->Register.File,
1423 reg->Register.Index,
1424 &reg->Indirect);
1425 } else {
1426 assert(reg->Register.Index <=
1427 bld_base->info->file_max[reg->Register.File]);
1428 }
1429
1430 switch( reg->Register.File ) {
1431 case TGSI_FILE_OUTPUT:
1432 /* Outputs are always stored as floats */
1433 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1434
1435 if (reg->Register.Indirect) {
1436 LLVMValueRef chan_vec =
1437 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1438 LLVMValueRef length_vec =
1439 lp_build_const_int_vec(gallivm, uint_bld->type, float_bld->type.length);
1440 LLVMValueRef index_vec; /* indexes into the temp registers */
1441 LLVMValueRef outputs_array;
1442 LLVMValueRef pixel_offsets;
1443 LLVMTypeRef float_ptr_type;
1444 int i;
1445
1446 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1447 pixel_offsets = uint_bld->undef;
1448 for (i = 0; i < float_bld->type.length; i++) {
1449 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1450 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1451 ii, ii, "");
1452 }
1453
1454 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1455 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1456 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1457 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1458 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1459
1460 float_ptr_type =
1461 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1462 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1463 float_ptr_type, "");
1464
1465 /* Scatter store values into temp registers */
1466 emit_mask_scatter(bld, outputs_array, index_vec, value,
1467 &bld->exec_mask, pred);
1468 }
1469 else {
1470 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1471 chan_index);
1472 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1473 }
1474 break;
1475
1476 case TGSI_FILE_TEMPORARY:
1477 /* Temporaries are always stored as floats */
1478 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1479
1480 if (reg->Register.Indirect) {
1481 LLVMValueRef chan_vec =
1482 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1483 LLVMValueRef length_vec =
1484 lp_build_const_int_vec(gallivm, uint_bld->type,
1485 float_bld->type.length);
1486 LLVMValueRef index_vec; /* indexes into the temp registers */
1487 LLVMValueRef temps_array;
1488 LLVMValueRef pixel_offsets;
1489 LLVMTypeRef float_ptr_type;
1490 int i;
1491
1492 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1493 pixel_offsets = uint_bld->undef;
1494 for (i = 0; i < float_bld->type.length; i++) {
1495 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1496 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1497 ii, ii, "");
1498 }
1499
1500 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1501 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1502 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1503 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1504 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1505
1506 float_ptr_type =
1507 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1508 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1509 float_ptr_type, "");
1510
1511 /* Scatter store values into temp registers */
1512 emit_mask_scatter(bld, temps_array, index_vec, value,
1513 &bld->exec_mask, pred);
1514 }
1515 else {
1516 LLVMValueRef temp_ptr;
1517 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1518 chan_index);
1519 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1520 }
1521 break;
1522
1523 case TGSI_FILE_ADDRESS:
1524 assert(dtype == TGSI_TYPE_SIGNED);
1525 assert(LLVMTypeOf(value) == int_bld->vec_type);
1526 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1527 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1528 bld->addr[reg->Register.Index][chan_index]);
1529 break;
1530
1531 case TGSI_FILE_PREDICATE:
1532 assert(LLVMTypeOf(value) == float_bld->vec_type);
1533 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1534 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1535 bld->preds[reg->Register.Index][chan_index]);
1536 break;
1537
1538 default:
1539 assert( 0 );
1540 }
1541
1542 (void)dtype;
1543 }
1544
1545 static void
1546 emit_store(
1547 struct lp_build_tgsi_context * bld_base,
1548 const struct tgsi_full_instruction * inst,
1549 const struct tgsi_opcode_info * info,
1550 LLVMValueRef dst[4])
1551
1552 {
1553 unsigned chan_index;
1554 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1555
1556 if(info->num_dst) {
1557 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1558
1559 emit_fetch_predicate( bld, inst, pred );
1560
1561 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1562 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1563 }
1564 }
1565 }
1566
1567 static unsigned
1568 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1569 {
1570 switch (tgsi_target) {
1571 case TGSI_TEXTURE_BUFFER:
1572 return PIPE_BUFFER;
1573 case TGSI_TEXTURE_1D:
1574 case TGSI_TEXTURE_SHADOW1D:
1575 return PIPE_TEXTURE_1D;
1576 case TGSI_TEXTURE_2D:
1577 case TGSI_TEXTURE_SHADOW2D:
1578 case TGSI_TEXTURE_2D_MSAA:
1579 return PIPE_TEXTURE_2D;
1580 case TGSI_TEXTURE_3D:
1581 return PIPE_TEXTURE_3D;
1582 case TGSI_TEXTURE_CUBE:
1583 case TGSI_TEXTURE_SHADOWCUBE:
1584 return PIPE_TEXTURE_CUBE;
1585 case TGSI_TEXTURE_RECT:
1586 case TGSI_TEXTURE_SHADOWRECT:
1587 return PIPE_TEXTURE_RECT;
1588 case TGSI_TEXTURE_1D_ARRAY:
1589 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1590 return PIPE_TEXTURE_1D_ARRAY;
1591 case TGSI_TEXTURE_2D_ARRAY:
1592 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1593 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1594 return PIPE_TEXTURE_2D_ARRAY;
1595 case TGSI_TEXTURE_CUBE_ARRAY:
1596 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1597 return PIPE_TEXTURE_CUBE_ARRAY;
1598 default:
1599 assert(0);
1600 return PIPE_BUFFER;
1601 }
1602 }
1603
1604 /**
1605 * High-level instruction translators.
1606 */
1607
1608 static void
1609 emit_tex( struct lp_build_tgsi_soa_context *bld,
1610 const struct tgsi_full_instruction *inst,
1611 enum lp_build_tex_modifier modifier,
1612 LLVMValueRef *texel)
1613 {
1614 unsigned unit;
1615 LLVMValueRef lod_bias, explicit_lod;
1616 LLVMValueRef oow = NULL;
1617 LLVMValueRef coords[4];
1618 LLVMValueRef offsets[3] = { NULL };
1619 struct lp_derivatives derivs;
1620 struct lp_derivatives *deriv_ptr = NULL;
1621 boolean scalar_lod;
1622 unsigned num_coords, num_derivs, num_offsets;
1623 unsigned i;
1624
1625 if (!bld->sampler) {
1626 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1627 for (i = 0; i < 4; i++) {
1628 texel[i] = bld->bld_base.base.undef;
1629 }
1630 return;
1631 }
1632
1633 switch (inst->Texture.Texture) {
1634 case TGSI_TEXTURE_1D:
1635 num_coords = 1;
1636 num_offsets = 1;
1637 num_derivs = 1;
1638 break;
1639 case TGSI_TEXTURE_1D_ARRAY:
1640 num_coords = 2;
1641 num_offsets = 1;
1642 num_derivs = 1;
1643 break;
1644 case TGSI_TEXTURE_2D:
1645 case TGSI_TEXTURE_RECT:
1646 num_coords = 2;
1647 num_offsets = 2;
1648 num_derivs = 2;
1649 break;
1650 case TGSI_TEXTURE_SHADOW1D:
1651 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1652 num_coords = 3;
1653 num_offsets = 1;
1654 num_derivs = 1;
1655 break;
1656 case TGSI_TEXTURE_SHADOW2D:
1657 case TGSI_TEXTURE_SHADOWRECT:
1658 case TGSI_TEXTURE_2D_ARRAY:
1659 num_coords = 3;
1660 num_offsets = 2;
1661 num_derivs = 2;
1662 break;
1663 case TGSI_TEXTURE_CUBE:
1664 num_coords = 3;
1665 num_offsets = 2;
1666 num_derivs = 3;
1667 break;
1668 case TGSI_TEXTURE_3D:
1669 num_coords = 3;
1670 num_offsets = 3;
1671 num_derivs = 3;
1672 break;
1673 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1674 num_coords = 4;
1675 num_offsets = 2;
1676 num_derivs = 2;
1677 break;
1678 case TGSI_TEXTURE_SHADOWCUBE:
1679 num_coords = 4;
1680 num_offsets = 2;
1681 num_derivs = 3;
1682 break;
1683 default:
1684 assert(0);
1685 return;
1686 }
1687
1688 /* Note lod and especially projected are illegal in a LOT of cases */
1689 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1690 assert(num_coords < 4);
1691 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1692 explicit_lod = NULL;
1693 }
1694 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1695 assert(num_coords < 4);
1696 lod_bias = NULL;
1697 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1698 }
1699 else {
1700 lod_bias = NULL;
1701 explicit_lod = NULL;
1702 }
1703
1704 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1705 assert(num_coords < 4);
1706 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1707 oow = lp_build_rcp(&bld->bld_base.base, oow);
1708 }
1709
1710 for (i = 0; i < num_coords; i++) {
1711 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1712 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1713 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1714 }
1715 for (i = num_coords; i < 4; i++) {
1716 coords[i] = bld->bld_base.base.undef;
1717 }
1718
1719 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1720 unsigned dim;
1721 for (dim = 0; dim < num_derivs; ++dim) {
1722 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1723 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1724 }
1725 deriv_ptr = &derivs;
1726 unit = inst->Src[3].Register.Index;
1727 } else {
1728 unit = inst->Src[1].Register.Index;
1729 }
1730
1731 /* some advanced gather instructions (txgo) would require 4 offsets */
1732 if (inst->Texture.NumOffsets == 1) {
1733 unsigned dim;
1734 for (dim = 0; dim < num_offsets; dim++) {
1735 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1736 }
1737 }
1738
1739 /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
1740 scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
1741
1742 bld->sampler->emit_fetch_texel(bld->sampler,
1743 bld->bld_base.base.gallivm,
1744 bld->bld_base.base.type,
1745 FALSE,
1746 unit, unit,
1747 coords,
1748 offsets,
1749 deriv_ptr,
1750 lod_bias, explicit_lod, scalar_lod,
1751 texel);
1752 }
1753
1754 static void
1755 emit_sample(struct lp_build_tgsi_soa_context *bld,
1756 const struct tgsi_full_instruction *inst,
1757 enum lp_build_tex_modifier modifier,
1758 boolean compare,
1759 LLVMValueRef *texel)
1760 {
1761 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1762 unsigned texture_unit, sampler_unit;
1763 LLVMValueRef lod_bias, explicit_lod;
1764 LLVMValueRef coords[4];
1765 LLVMValueRef offsets[3] = { NULL };
1766 struct lp_derivatives derivs;
1767 struct lp_derivatives *deriv_ptr = NULL;
1768 boolean scalar_lod;
1769 unsigned num_coords, num_offsets, num_derivs;
1770 unsigned i;
1771
1772 if (!bld->sampler) {
1773 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1774 for (i = 0; i < 4; i++) {
1775 texel[i] = bld->bld_base.base.undef;
1776 }
1777 return;
1778 }
1779
1780 /*
1781 * unlike old-style tex opcodes the texture/sampler indices
1782 * always come from src1 and src2 respectively.
1783 */
1784 texture_unit = inst->Src[1].Register.Index;
1785 sampler_unit = inst->Src[2].Register.Index;
1786
1787 /*
1788 * Note inst->Texture.Texture will contain the number of offsets,
1789 * however the target information is NOT there and comes from the
1790 * declared sampler views instead.
1791 */
1792 switch (bld->sv[texture_unit].Resource) {
1793 case TGSI_TEXTURE_1D:
1794 num_coords = 1;
1795 num_offsets = 1;
1796 num_derivs = 1;
1797 break;
1798 case TGSI_TEXTURE_1D_ARRAY:
1799 num_coords = 2;
1800 num_offsets = 1;
1801 num_derivs = 1;
1802 break;
1803 case TGSI_TEXTURE_2D:
1804 case TGSI_TEXTURE_RECT:
1805 num_coords = 2;
1806 num_offsets = 2;
1807 num_derivs = 2;
1808 break;
1809 case TGSI_TEXTURE_2D_ARRAY:
1810 num_coords = 3;
1811 num_offsets = 2;
1812 num_derivs = 2;
1813 break;
1814 case TGSI_TEXTURE_CUBE:
1815 num_coords = 3;
1816 num_offsets = 2;
1817 num_derivs = 3;
1818 break;
1819 case TGSI_TEXTURE_3D:
1820 num_coords = 3;
1821 num_offsets = 3;
1822 num_derivs = 3;
1823 break;
1824 case TGSI_TEXTURE_CUBE_ARRAY:
1825 num_coords = 4;
1826 num_offsets = 2;
1827 num_derivs = 3;
1828 break;
1829 default:
1830 assert(0);
1831 return;
1832 }
1833
1834 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1835 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1836 explicit_lod = NULL;
1837 }
1838 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1839 lod_bias = NULL;
1840 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1841 }
1842 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1843 lod_bias = NULL;
1844 /* XXX might be better to explicitly pass the level zero information */
1845 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1846 }
1847 else {
1848 lod_bias = NULL;
1849 explicit_lod = NULL;
1850 }
1851
1852 for (i = 0; i < num_coords; i++) {
1853 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1854 }
1855 for (i = num_coords; i < 4; i++) {
1856 coords[i] = bld->bld_base.base.undef;
1857 }
1858 /*
1859 * XXX: whack shadow comparison value into place.
1860 * Should probably fix the interface for separate value
1861 * (it will not work for cube arrays if it is part of coords).
1862 */
1863 if (compare) {
1864 unsigned c_coord = num_coords > 2 ? 3 : 2;
1865 assert(num_coords < 4);
1866 coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1867 }
1868
1869 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1870 unsigned dim;
1871 for (dim = 0; dim < num_derivs; ++dim) {
1872 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
1873 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
1874 }
1875 deriv_ptr = &derivs;
1876 }
1877
1878 /* some advanced gather instructions (txgo) would require 4 offsets */
1879 if (inst->Texture.NumOffsets == 1) {
1880 unsigned dim;
1881 for (dim = 0; dim < num_offsets; dim++) {
1882 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1883 }
1884 }
1885
1886 /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
1887 scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
1888
1889 bld->sampler->emit_fetch_texel(bld->sampler,
1890 bld->bld_base.base.gallivm,
1891 bld->bld_base.base.type,
1892 FALSE,
1893 texture_unit, sampler_unit,
1894 coords,
1895 offsets,
1896 deriv_ptr,
1897 lod_bias, explicit_lod, scalar_lod,
1898 texel);
1899
1900 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
1901 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
1902 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
1903 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
1904 unsigned char swizzles[4];
1905 swizzles[0] = inst->Src[1].Register.SwizzleX;
1906 swizzles[1] = inst->Src[1].Register.SwizzleY;
1907 swizzles[2] = inst->Src[1].Register.SwizzleZ;
1908 swizzles[3] = inst->Src[1].Register.SwizzleW;
1909
1910 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
1911 }
1912 }
1913
1914 static void
1915 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
1916 const struct tgsi_full_instruction *inst,
1917 LLVMValueRef *texel,
1918 boolean is_samplei)
1919 {
1920 unsigned unit, target;
1921 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1922 LLVMValueRef explicit_lod = NULL;
1923 LLVMValueRef coords[3];
1924 LLVMValueRef offsets[3] = { NULL };
1925 boolean scalar_lod;
1926 unsigned num_coords;
1927 unsigned dims;
1928 unsigned i;
1929
1930 if (!bld->sampler) {
1931 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1932 for (i = 0; i < 4; i++) {
1933 texel[i] = coord_undef;
1934 }
1935 return;
1936 }
1937
1938 unit = inst->Src[1].Register.Index;
1939
1940 if (is_samplei) {
1941 target = bld->sv[unit].Resource;
1942 }
1943 else {
1944 target = inst->Texture.Texture;
1945 }
1946
1947 switch (target) {
1948 case TGSI_TEXTURE_1D:
1949 case TGSI_TEXTURE_BUFFER:
1950 num_coords = 1;
1951 dims = 1;
1952 break;
1953 case TGSI_TEXTURE_1D_ARRAY:
1954 num_coords = 2;
1955 dims = 1;
1956 break;
1957 case TGSI_TEXTURE_2D:
1958 case TGSI_TEXTURE_RECT:
1959 num_coords = 2;
1960 dims = 2;
1961 break;
1962 case TGSI_TEXTURE_2D_ARRAY:
1963 num_coords = 3;
1964 dims = 2;
1965 break;
1966 case TGSI_TEXTURE_3D:
1967 num_coords = 3;
1968 dims = 3;
1969 break;
1970 default:
1971 assert(0);
1972 return;
1973 }
1974
1975 /* always have lod except for buffers ? */
1976 if (target != TGSI_TEXTURE_BUFFER) {
1977 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1978 }
1979
1980 for (i = 0; i < num_coords; i++) {
1981 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1982 }
1983 for (i = num_coords; i < 3; i++) {
1984 coords[i] = coord_undef;
1985 }
1986
1987 if (inst->Texture.NumOffsets == 1) {
1988 unsigned dim;
1989 for (dim = 0; dim < dims; dim++) {
1990 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1991 }
1992 }
1993
1994 /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
1995 scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
1996
1997 bld->sampler->emit_fetch_texel(bld->sampler,
1998 bld->bld_base.base.gallivm,
1999 bld->bld_base.base.type,
2000 TRUE,
2001 unit, unit,
2002 coords,
2003 offsets,
2004 NULL,
2005 NULL, explicit_lod, scalar_lod,
2006 texel);
2007
2008 if (is_samplei &&
2009 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2010 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2011 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2012 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
2013 unsigned char swizzles[4];
2014 swizzles[0] = inst->Src[1].Register.SwizzleX;
2015 swizzles[1] = inst->Src[1].Register.SwizzleY;
2016 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2017 swizzles[3] = inst->Src[1].Register.SwizzleW;
2018
2019 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2020 }
2021 }
2022
2023 static void
2024 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2025 const struct tgsi_full_instruction *inst,
2026 LLVMValueRef *sizes_out,
2027 boolean is_sviewinfo)
2028 {
2029 LLVMValueRef explicit_lod;
2030 boolean scalar_lod;
2031 unsigned has_lod;
2032 unsigned i;
2033 unsigned unit = inst->Src[1].Register.Index;
2034 unsigned target, pipe_target;
2035
2036 if (is_sviewinfo) {
2037 target = bld->sv[unit].Resource;
2038 }
2039 else {
2040 target = inst->Texture.Texture;
2041 }
2042 switch (target) {
2043 case TGSI_TEXTURE_BUFFER:
2044 case TGSI_TEXTURE_RECT:
2045 case TGSI_TEXTURE_SHADOWRECT:
2046 has_lod = 0;
2047 break;
2048 default:
2049 has_lod = 1;
2050 break;
2051 }
2052
2053 if (!bld->sampler) {
2054 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2055 for (i = 0; i < 4; i++)
2056 sizes_out[i] = bld->bld_base.int_bld.undef;
2057 return;
2058 }
2059
2060 if (has_lod)
2061 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
2062 else
2063 explicit_lod = NULL;
2064
2065 pipe_target = tgsi_to_pipe_tex_target(target);
2066
2067 /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
2068 scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
2069
2070 bld->sampler->emit_size_query(bld->sampler,
2071 bld->bld_base.base.gallivm,
2072 bld->bld_base.int_bld.type,
2073 unit, pipe_target,
2074 is_sviewinfo,
2075 scalar_lod,
2076 explicit_lod,
2077 sizes_out);
2078 }
2079
2080 static boolean
2081 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2082 int pc)
2083 {
2084 int i;
2085
2086 for (i = 0; i < 5; i++) {
2087 unsigned opcode;
2088
2089 if (pc + i >= bld->bld_base.info->num_instructions)
2090 return TRUE;
2091
2092 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2093
2094 if (opcode == TGSI_OPCODE_END)
2095 return TRUE;
2096
2097 if (opcode == TGSI_OPCODE_TEX ||
2098 opcode == TGSI_OPCODE_TXP ||
2099 opcode == TGSI_OPCODE_TXD ||
2100 opcode == TGSI_OPCODE_TXB ||
2101 opcode == TGSI_OPCODE_TXL ||
2102 opcode == TGSI_OPCODE_TXF ||
2103 opcode == TGSI_OPCODE_TXQ ||
2104 opcode == TGSI_OPCODE_CAL ||
2105 opcode == TGSI_OPCODE_CALLNZ ||
2106 opcode == TGSI_OPCODE_IF ||
2107 opcode == TGSI_OPCODE_UIF ||
2108 opcode == TGSI_OPCODE_BGNLOOP ||
2109 opcode == TGSI_OPCODE_SWITCH)
2110 return FALSE;
2111 }
2112
2113 return TRUE;
2114 }
2115
2116
2117
2118 /**
2119 * Kill fragment if any of the src register values are negative.
2120 */
2121 static void
2122 emit_kill_if(
2123 struct lp_build_tgsi_soa_context *bld,
2124 const struct tgsi_full_instruction *inst,
2125 int pc)
2126 {
2127 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2128 const struct tgsi_full_src_register *reg = &inst->Src[0];
2129 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2130 LLVMValueRef mask;
2131 unsigned chan_index;
2132
2133 memset(&terms, 0, sizeof terms);
2134
2135 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2136 unsigned swizzle;
2137
2138 /* Unswizzle channel */
2139 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2140
2141 /* Check if the component has not been already tested. */
2142 assert(swizzle < TGSI_NUM_CHANNELS);
2143 if( !terms[swizzle] )
2144 /* TODO: change the comparison operator instead of setting the sign */
2145 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2146 }
2147
2148 mask = NULL;
2149 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2150 if(terms[chan_index]) {
2151 LLVMValueRef chan_mask;
2152
2153 /*
2154 * If term < 0 then mask = 0 else mask = ~0.
2155 */
2156 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2157
2158 if(mask)
2159 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2160 else
2161 mask = chan_mask;
2162 }
2163 }
2164
2165 if(mask) {
2166 lp_build_mask_update(bld->mask, mask);
2167
2168 if (!near_end_of_shader(bld, pc))
2169 lp_build_mask_check(bld->mask);
2170 }
2171 }
2172
2173
2174 /**
2175 * Unconditional fragment kill.
2176 * The only predication is the execution mask which will apply if
2177 * we're inside a loop or conditional.
2178 */
2179 static void
2180 emit_kill(struct lp_build_tgsi_soa_context *bld,
2181 int pc)
2182 {
2183 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2184 LLVMValueRef mask;
2185
2186 /* For those channels which are "alive", disable fragment shader
2187 * execution.
2188 */
2189 if (bld->exec_mask.has_mask) {
2190 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2191 }
2192 else {
2193 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2194 mask = zero;
2195 }
2196
2197 lp_build_mask_update(bld->mask, mask);
2198
2199 if (!near_end_of_shader(bld, pc))
2200 lp_build_mask_check(bld->mask);
2201 }
2202
2203
2204 /**
2205 * Emit code which will dump the value of all the temporary registers
2206 * to stdout.
2207 */
2208 static void
2209 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
2210 {
2211 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2212 LLVMBuilderRef builder = gallivm->builder;
2213 LLVMValueRef temp_ptr;
2214 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
2215 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
2216 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
2217 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
2218 int index;
2219 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
2220
2221 for (index = 0; index < n; index++) {
2222 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
2223 LLVMValueRef v[4][4], res;
2224 int chan;
2225
2226 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
2227
2228 for (chan = 0; chan < 4; chan++) {
2229 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2230 res = LLVMBuildLoad(builder, temp_ptr, "");
2231 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
2232 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
2233 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
2234 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
2235 }
2236
2237 lp_build_printf(gallivm, " X: %f %f %f %f\n",
2238 v[0][0], v[0][1], v[0][2], v[0][3]);
2239 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
2240 v[1][0], v[1][1], v[1][2], v[1][3]);
2241 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
2242 v[2][0], v[2][1], v[2][2], v[2][3]);
2243 lp_build_printf(gallivm, " W: %f %f %f %f\n",
2244 v[3][0], v[3][1], v[3][2], v[3][3]);
2245 }
2246 }
2247
2248
2249
2250 void
2251 lp_emit_declaration_soa(
2252 struct lp_build_tgsi_context *bld_base,
2253 const struct tgsi_full_declaration *decl)
2254 {
2255 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2256 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2257 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2258 const unsigned first = decl->Range.First;
2259 const unsigned last = decl->Range.Last;
2260 unsigned idx, i;
2261
2262 for (idx = first; idx <= last; ++idx) {
2263 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2264 switch (decl->Declaration.File) {
2265 case TGSI_FILE_TEMPORARY:
2266 assert(idx < LP_MAX_TGSI_TEMPS);
2267 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2268 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2269 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2270 }
2271 break;
2272
2273 case TGSI_FILE_OUTPUT:
2274 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2275 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2276 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2277 vec_type, "output");
2278 }
2279 break;
2280
2281 case TGSI_FILE_ADDRESS:
2282 /* ADDR registers are only allocated with an integer LLVM IR type,
2283 * as they are guaranteed to always have integers.
2284 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2285 * an ADDR register for that matter).
2286 */
2287 assert(idx < LP_MAX_TGSI_ADDRS);
2288 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2289 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2290 break;
2291
2292 case TGSI_FILE_PREDICATE:
2293 assert(idx < LP_MAX_TGSI_PREDS);
2294 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2295 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2296 "predicate");
2297 break;
2298
2299 case TGSI_FILE_SAMPLER_VIEW:
2300 /*
2301 * The target stored here MUST match whatever there actually
2302 * is in the set sampler views (what about return type?).
2303 */
2304 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2305 bld->sv[idx] = decl->SamplerView;
2306 break;
2307
2308 default:
2309 /* don't need to declare other vars */
2310 break;
2311 }
2312 }
2313 }
2314
2315
2316 void lp_emit_immediate_soa(
2317 struct lp_build_tgsi_context *bld_base,
2318 const struct tgsi_full_immediate *imm)
2319 {
2320 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2321 struct gallivm_state * gallivm = bld_base->base.gallivm;
2322
2323 /* simply copy the immediate values into the next immediates[] slot */
2324 unsigned i;
2325 const uint size = imm->Immediate.NrTokens - 1;
2326 assert(size <= 4);
2327 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
2328 switch (imm->Immediate.DataType) {
2329 case TGSI_IMM_FLOAT32:
2330 for( i = 0; i < size; ++i )
2331 bld->immediates[bld->num_immediates][i] =
2332 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2333
2334 break;
2335 case TGSI_IMM_UINT32:
2336 for( i = 0; i < size; ++i ) {
2337 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2338 bld->immediates[bld->num_immediates][i] =
2339 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2340 }
2341
2342 break;
2343 case TGSI_IMM_INT32:
2344 for( i = 0; i < size; ++i ) {
2345 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2346 bld->immediates[bld->num_immediates][i] =
2347 LLVMConstBitCast(tmp, bld_base->base.vec_type);
2348 }
2349
2350 break;
2351 }
2352 for( i = size; i < 4; ++i )
2353 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
2354
2355 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2356 unsigned index = bld->num_immediates;
2357 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2358 LLVMBuilderRef builder = gallivm->builder;
2359 for (i = 0; i < 4; ++i ) {
2360 LLVMValueRef lindex = lp_build_const_int32(
2361 bld->bld_base.base.gallivm, index * 4 + i);
2362 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2363 bld->imms_array, &lindex, 1, "");
2364 LLVMBuildStore(builder,
2365 bld->immediates[index][i],
2366 imm_ptr);
2367 }
2368 }
2369
2370 bld->num_immediates++;
2371 }
2372
2373 static void
2374 ddx_emit(
2375 const struct lp_build_tgsi_action * action,
2376 struct lp_build_tgsi_context * bld_base,
2377 struct lp_build_emit_data * emit_data)
2378 {
2379 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2380
2381 emit_fetch_deriv(bld, emit_data->args[0], NULL,
2382 &emit_data->output[emit_data->chan], NULL);
2383 }
2384
2385 static void
2386 ddy_emit(
2387 const struct lp_build_tgsi_action * action,
2388 struct lp_build_tgsi_context * bld_base,
2389 struct lp_build_emit_data * emit_data)
2390 {
2391 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2392
2393 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2394 &emit_data->output[emit_data->chan]);
2395 }
2396
2397 static void
2398 kill_emit(
2399 const struct lp_build_tgsi_action * action,
2400 struct lp_build_tgsi_context * bld_base,
2401 struct lp_build_emit_data * emit_data)
2402 {
2403 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2404
2405 emit_kill(bld, bld_base->pc - 1);
2406 }
2407
2408 static void
2409 kill_if_emit(
2410 const struct lp_build_tgsi_action * action,
2411 struct lp_build_tgsi_context * bld_base,
2412 struct lp_build_emit_data * emit_data)
2413 {
2414 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2415
2416 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2417 }
2418
2419 static void
2420 tex_emit(
2421 const struct lp_build_tgsi_action * action,
2422 struct lp_build_tgsi_context * bld_base,
2423 struct lp_build_emit_data * emit_data)
2424 {
2425 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2426
2427 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2428 }
2429
2430 static void
2431 txb_emit(
2432 const struct lp_build_tgsi_action * action,
2433 struct lp_build_tgsi_context * bld_base,
2434 struct lp_build_emit_data * emit_data)
2435 {
2436 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2437
2438 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2439 emit_data->output);
2440 }
2441
2442 static void
2443 txd_emit(
2444 const struct lp_build_tgsi_action * action,
2445 struct lp_build_tgsi_context * bld_base,
2446 struct lp_build_emit_data * emit_data)
2447 {
2448 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2449
2450 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2451 emit_data->output);
2452 }
2453
2454 static void
2455 txl_emit(
2456 const struct lp_build_tgsi_action * action,
2457 struct lp_build_tgsi_context * bld_base,
2458 struct lp_build_emit_data * emit_data)
2459 {
2460 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2461
2462 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2463 emit_data->output);
2464 }
2465
2466 static void
2467 txp_emit(
2468 const struct lp_build_tgsi_action * action,
2469 struct lp_build_tgsi_context * bld_base,
2470 struct lp_build_emit_data * emit_data)
2471 {
2472 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2473
2474 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2475 emit_data->output);
2476 }
2477
2478 static void
2479 txq_emit(
2480 const struct lp_build_tgsi_action * action,
2481 struct lp_build_tgsi_context * bld_base,
2482 struct lp_build_emit_data * emit_data)
2483 {
2484 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2485
2486 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2487 }
2488
2489 static void
2490 txf_emit(
2491 const struct lp_build_tgsi_action * action,
2492 struct lp_build_tgsi_context * bld_base,
2493 struct lp_build_emit_data * emit_data)
2494 {
2495 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2496
2497 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2498 }
2499
2500 static void
2501 sample_i_emit(
2502 const struct lp_build_tgsi_action * action,
2503 struct lp_build_tgsi_context * bld_base,
2504 struct lp_build_emit_data * emit_data)
2505 {
2506 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2507
2508 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2509 }
2510
2511 static void
2512 sample_emit(
2513 const struct lp_build_tgsi_action * action,
2514 struct lp_build_tgsi_context * bld_base,
2515 struct lp_build_emit_data * emit_data)
2516 {
2517 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2518
2519 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2520 FALSE, emit_data->output);
2521 }
2522
2523 static void
2524 sample_b_emit(
2525 const struct lp_build_tgsi_action * action,
2526 struct lp_build_tgsi_context * bld_base,
2527 struct lp_build_emit_data * emit_data)
2528 {
2529 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2530
2531 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2532 FALSE, emit_data->output);
2533 }
2534
2535 static void
2536 sample_c_emit(
2537 const struct lp_build_tgsi_action * action,
2538 struct lp_build_tgsi_context * bld_base,
2539 struct lp_build_emit_data * emit_data)
2540 {
2541 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2542
2543 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2544 TRUE, emit_data->output);
2545 }
2546
2547 static void
2548 sample_c_lz_emit(
2549 const struct lp_build_tgsi_action * action,
2550 struct lp_build_tgsi_context * bld_base,
2551 struct lp_build_emit_data * emit_data)
2552 {
2553 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2554
2555 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2556 TRUE, emit_data->output);
2557 }
2558
2559 static void
2560 sample_d_emit(
2561 const struct lp_build_tgsi_action * action,
2562 struct lp_build_tgsi_context * bld_base,
2563 struct lp_build_emit_data * emit_data)
2564 {
2565 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2566
2567 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2568 FALSE, emit_data->output);
2569 }
2570
2571 static void
2572 sample_l_emit(
2573 const struct lp_build_tgsi_action * action,
2574 struct lp_build_tgsi_context * bld_base,
2575 struct lp_build_emit_data * emit_data)
2576 {
2577 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2578
2579 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2580 FALSE, emit_data->output);
2581 }
2582
2583 static void
2584 sviewinfo_emit(
2585 const struct lp_build_tgsi_action * action,
2586 struct lp_build_tgsi_context * bld_base,
2587 struct lp_build_emit_data * emit_data)
2588 {
2589 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2590
2591 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2592 }
2593
2594 static LLVMValueRef
2595 mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
2596 {
2597 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2598 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2599 LLVMValueRef one_vec = bld_base->int_bld.one;
2600 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2601
2602 if (exec_mask->has_mask) {
2603 one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
2604 }
2605 one_vec = LLVMBuildAnd(builder, one_vec,
2606 lp_build_mask_value(bld->mask), "");
2607 return one_vec;
2608 }
2609
2610 static void
2611 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2612 LLVMValueRef ptr,
2613 LLVMValueRef mask)
2614 {
2615 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2616
2617 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2618
2619 current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
2620
2621 LLVMBuildStore(builder, current_vec, ptr);
2622 }
2623
2624 static void
2625 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2626 LLVMValueRef ptr,
2627 LLVMValueRef mask)
2628 {
2629 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2630
2631 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2632 LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
2633 PIPE_FUNC_NOTEQUAL,
2634 mask,
2635 bld_base->uint_bld.zero);
2636
2637 current_vec = lp_build_select(&bld_base->uint_bld,
2638 full_mask,
2639 bld_base->uint_bld.zero,
2640 current_vec);
2641
2642 LLVMBuildStore(builder, current_vec, ptr);
2643 }
2644
2645 static LLVMValueRef
2646 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
2647 LLVMValueRef current_mask_vec,
2648 LLVMValueRef total_emitted_vertices_vec)
2649 {
2650 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2651 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2652 LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
2653 total_emitted_vertices_vec,
2654 bld->max_output_vertices_vec);
2655
2656 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2657 }
2658
2659 static void
2660 emit_vertex(
2661 const struct lp_build_tgsi_action * action,
2662 struct lp_build_tgsi_context * bld_base,
2663 struct lp_build_emit_data * emit_data)
2664 {
2665 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2666 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2667
2668 if (bld->gs_iface->emit_vertex) {
2669 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2670 LLVMValueRef total_emitted_vertices_vec =
2671 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2672 masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
2673 total_emitted_vertices_vec);
2674 gather_outputs(bld);
2675 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2676 bld->outputs,
2677 total_emitted_vertices_vec);
2678 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2679 masked_ones);
2680 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2681 masked_ones);
2682 #if DUMP_GS_EMITS
2683 lp_build_print_value(bld->bld_base.base.gallivm,
2684 " +++ emit vertex masked ones = ",
2685 masked_ones);
2686 lp_build_print_value(bld->bld_base.base.gallivm,
2687 " +++ emit vertex emitted = ",
2688 total_emitted_vertices_vec);
2689 #endif
2690 }
2691 }
2692
2693
2694 static void
2695 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
2696 LLVMValueRef masked_ones)
2697 {
2698 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2699 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2700
2701 if (bld->gs_iface->end_primitive) {
2702 LLVMValueRef emitted_vertices_vec =
2703 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2704 LLVMValueRef emitted_prims_vec =
2705 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2706
2707 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2708 emitted_vertices_vec,
2709 emitted_prims_vec);
2710
2711 #if DUMP_GS_EMITS
2712 lp_build_print_value(bld->bld_base.base.gallivm,
2713 " +++ end prim masked ones = ",
2714 masked_ones);
2715 lp_build_print_value(bld->bld_base.base.gallivm,
2716 " +++ end prim emitted verts1 = ",
2717 emitted_vertices_vec);
2718 lp_build_print_value(bld->bld_base.base.gallivm,
2719 " +++ end prim emitted prims1 = ",
2720 LLVMBuildLoad(builder,
2721 bld->emitted_prims_vec_ptr, ""));
2722 #endif
2723 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2724 masked_ones);
2725 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2726 masked_ones);
2727 #if DUMP_GS_EMITS
2728 lp_build_print_value(bld->bld_base.base.gallivm,
2729 " +++ end prim emitted verts2 = ",
2730 LLVMBuildLoad(builder,
2731 bld->emitted_vertices_vec_ptr, ""));
2732 #endif
2733 }
2734
2735 }
2736
2737 static void
2738 end_primitive(
2739 const struct lp_build_tgsi_action * action,
2740 struct lp_build_tgsi_context * bld_base,
2741 struct lp_build_emit_data * emit_data)
2742 {
2743 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2744
2745 if (bld->gs_iface->end_primitive) {
2746 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2747 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2748 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2749 LLVMValueRef emitted_verts = LLVMBuildLoad(
2750 builder, bld->emitted_vertices_vec_ptr, "");
2751 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2752 emitted_verts,
2753 uint_bld->zero);
2754 /* We need to combine the current execution mask with the mask
2755 telling us which, if any, execution slots actually have
2756 unemitted primitives, this way we make sure that end_primitives
2757 executes only on the paths that have unflushed vertices */
2758 masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
2759
2760 end_primitive_masked(bld_base, masked_ones);
2761 }
2762 }
2763
2764 static void
2765 cal_emit(
2766 const struct lp_build_tgsi_action * action,
2767 struct lp_build_tgsi_context * bld_base,
2768 struct lp_build_emit_data * emit_data)
2769 {
2770 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2771
2772 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2773 &bld_base->pc);
2774 }
2775
2776 static void
2777 ret_emit(
2778 const struct lp_build_tgsi_action * action,
2779 struct lp_build_tgsi_context * bld_base,
2780 struct lp_build_emit_data * emit_data)
2781 {
2782 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2783
2784 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2785 }
2786
2787 static void
2788 brk_emit(
2789 const struct lp_build_tgsi_action * action,
2790 struct lp_build_tgsi_context * bld_base,
2791 struct lp_build_emit_data * emit_data)
2792 {
2793 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2794
2795 lp_exec_break(&bld->exec_mask, bld_base);
2796 }
2797
2798 static void
2799 breakc_emit(
2800 const struct lp_build_tgsi_action * action,
2801 struct lp_build_tgsi_context * bld_base,
2802 struct lp_build_emit_data * emit_data)
2803 {
2804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2805 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2806 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2807 LLVMValueRef unsigned_cond =
2808 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
2809 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2810 unsigned_cond,
2811 uint_bld->zero);
2812
2813 lp_exec_break_condition(&bld->exec_mask, cond);
2814 }
2815
2816 static void
2817 if_emit(
2818 const struct lp_build_tgsi_action * action,
2819 struct lp_build_tgsi_context * bld_base,
2820 struct lp_build_emit_data * emit_data)
2821 {
2822 LLVMValueRef tmp;
2823 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2824
2825 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2826 emit_data->args[0], bld->bld_base.base.zero);
2827 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2828 }
2829
2830 static void
2831 uif_emit(
2832 const struct lp_build_tgsi_action * action,
2833 struct lp_build_tgsi_context * bld_base,
2834 struct lp_build_emit_data * emit_data)
2835 {
2836 LLVMValueRef tmp;
2837 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2838 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2839
2840 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2841 emit_data->args[0], uint_bld->zero);
2842 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2843 }
2844
2845 static void
2846 case_emit(
2847 const struct lp_build_tgsi_action * action,
2848 struct lp_build_tgsi_context * bld_base,
2849 struct lp_build_emit_data * emit_data)
2850 {
2851 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2852
2853 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
2854 }
2855
2856 static void
2857 default_emit(
2858 const struct lp_build_tgsi_action * action,
2859 struct lp_build_tgsi_context * bld_base,
2860 struct lp_build_emit_data * emit_data)
2861 {
2862 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2863
2864 lp_exec_default(&bld->exec_mask, bld_base);
2865 }
2866
2867 static void
2868 switch_emit(
2869 const struct lp_build_tgsi_action * action,
2870 struct lp_build_tgsi_context * bld_base,
2871 struct lp_build_emit_data * emit_data)
2872 {
2873 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2874
2875 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
2876 }
2877
2878 static void
2879 endswitch_emit(
2880 const struct lp_build_tgsi_action * action,
2881 struct lp_build_tgsi_context * bld_base,
2882 struct lp_build_emit_data * emit_data)
2883 {
2884 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2885
2886 lp_exec_endswitch(&bld->exec_mask, bld_base);
2887 }
2888
2889 static void
2890 bgnloop_emit(
2891 const struct lp_build_tgsi_action * action,
2892 struct lp_build_tgsi_context * bld_base,
2893 struct lp_build_emit_data * emit_data)
2894 {
2895 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2896
2897 lp_exec_bgnloop(&bld->exec_mask);
2898 }
2899
2900 static void
2901 bgnsub_emit(
2902 const struct lp_build_tgsi_action * action,
2903 struct lp_build_tgsi_context * bld_base,
2904 struct lp_build_emit_data * emit_data)
2905 {
2906 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2907
2908 lp_exec_mask_bgnsub(&bld->exec_mask);
2909 }
2910
2911 static void
2912 else_emit(
2913 const struct lp_build_tgsi_action * action,
2914 struct lp_build_tgsi_context * bld_base,
2915 struct lp_build_emit_data * emit_data)
2916 {
2917 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2918
2919 lp_exec_mask_cond_invert(&bld->exec_mask);
2920 }
2921
2922 static void
2923 endif_emit(
2924 const struct lp_build_tgsi_action * action,
2925 struct lp_build_tgsi_context * bld_base,
2926 struct lp_build_emit_data * emit_data)
2927 {
2928 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2929
2930 lp_exec_mask_cond_pop(&bld->exec_mask);
2931 }
2932
2933 static void
2934 endloop_emit(
2935 const struct lp_build_tgsi_action * action,
2936 struct lp_build_tgsi_context * bld_base,
2937 struct lp_build_emit_data * emit_data)
2938 {
2939 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2940
2941 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
2942 }
2943
2944 static void
2945 endsub_emit(
2946 const struct lp_build_tgsi_action * action,
2947 struct lp_build_tgsi_context * bld_base,
2948 struct lp_build_emit_data * emit_data)
2949 {
2950 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2951
2952 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
2953 }
2954
2955 static void
2956 cont_emit(
2957 const struct lp_build_tgsi_action * action,
2958 struct lp_build_tgsi_context * bld_base,
2959 struct lp_build_emit_data * emit_data)
2960 {
2961 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2962
2963 lp_exec_continue(&bld->exec_mask);
2964 }
2965
2966 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2967 *
2968 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2969 * from old code, but there is no garauntee that LLVM will use those registers
2970 * for this code.
2971 *
2972 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2973 * should be handled by the emit_data->fetch_args function. */
2974 static void
2975 nrm_emit(
2976 const struct lp_build_tgsi_action * action,
2977 struct lp_build_tgsi_context * bld_base,
2978 struct lp_build_emit_data * emit_data)
2979 {
2980 LLVMValueRef tmp0, tmp1;
2981 LLVMValueRef tmp4 = NULL;
2982 LLVMValueRef tmp5 = NULL;
2983 LLVMValueRef tmp6 = NULL;
2984 LLVMValueRef tmp7 = NULL;
2985 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2986
2987 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2988
2989 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2990 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2991 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2992 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2993
2994 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2995
2996 /* xmm4 = src.x */
2997 /* xmm0 = src.x * src.x */
2998 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2999 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
3000 tmp4 = tmp0;
3001 }
3002 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
3003
3004 /* xmm5 = src.y */
3005 /* xmm0 = xmm0 + src.y * src.y */
3006 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
3007 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
3008 tmp5 = tmp1;
3009 }
3010 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3011 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3012
3013 /* xmm6 = src.z */
3014 /* xmm0 = xmm0 + src.z * src.z */
3015 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
3016 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
3017 tmp6 = tmp1;
3018 }
3019 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3020 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3021
3022 if (dims == 4) {
3023 /* xmm7 = src.w */
3024 /* xmm0 = xmm0 + src.w * src.w */
3025 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
3026 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
3027 tmp7 = tmp1;
3028 }
3029 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
3030 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
3031 }
3032 /* xmm1 = 1 / sqrt(xmm0) */
3033 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
3034 /* dst.x = xmm1 * src.x */
3035 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
3036 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
3037 }
3038 /* dst.y = xmm1 * src.y */
3039 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
3040 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
3041 }
3042
3043 /* dst.z = xmm1 * src.z */
3044 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
3045 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
3046 }
3047 /* dst.w = xmm1 * src.w */
3048 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
3049 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
3050 }
3051 }
3052
3053 /* dst.w = 1.0 */
3054 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
3055 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
3056 }
3057 }
3058
3059 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3060 {
3061 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3062 struct gallivm_state * gallivm = bld_base->base.gallivm;
3063
3064 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3065 LLVMValueRef array_size =
3066 lp_build_const_int32(gallivm,
3067 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3068 bld->temps_array = lp_build_array_alloca(gallivm,
3069 bld_base->base.vec_type, array_size,
3070 "temp_array");
3071 }
3072
3073 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3074 LLVMValueRef array_size =
3075 lp_build_const_int32(gallivm,
3076 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3077 bld->outputs_array = lp_build_array_alloca(gallivm,
3078 bld_base->base.vec_type, array_size,
3079 "output_array");
3080 }
3081
3082 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3083 LLVMValueRef array_size =
3084 lp_build_const_int32(gallivm,
3085 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3086 bld->imms_array = lp_build_array_alloca(gallivm,
3087 bld_base->base.vec_type, array_size,
3088 "imms_array");
3089 }
3090
3091 /* If we have indirect addressing in inputs we need to copy them into
3092 * our alloca array to be able to iterate over them */
3093 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3094 unsigned index, chan;
3095 LLVMTypeRef vec_type = bld_base->base.vec_type;
3096 LLVMValueRef array_size = lp_build_const_int32(gallivm,
3097 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3098 bld->inputs_array = lp_build_array_alloca(gallivm,
3099 vec_type, array_size,
3100 "input_array");
3101
3102 assert(bld_base->info->num_inputs
3103 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3104
3105 for (index = 0; index < bld_base->info->num_inputs; ++index) {
3106 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3107 LLVMValueRef lindex =
3108 lp_build_const_int32(gallivm, index * 4 + chan);
3109 LLVMValueRef input_ptr =
3110 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3111 &lindex, 1, "");
3112 LLVMValueRef value = bld->inputs[index][chan];
3113 if (value)
3114 LLVMBuildStore(gallivm->builder, value, input_ptr);
3115 }
3116 }
3117 }
3118
3119 if (bld->gs_iface) {
3120 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3121 bld->emitted_prims_vec_ptr =
3122 lp_build_alloca(gallivm,
3123 uint_bld->vec_type,
3124 "emitted_prims_ptr");
3125 bld->emitted_vertices_vec_ptr =
3126 lp_build_alloca(gallivm,
3127 uint_bld->vec_type,
3128 "emitted_vertices_ptr");
3129 bld->total_emitted_vertices_vec_ptr =
3130 lp_build_alloca(gallivm,
3131 uint_bld->vec_type,
3132 "total_emitted_vertices_ptr");
3133
3134 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3135 bld->emitted_prims_vec_ptr);
3136 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3137 bld->emitted_vertices_vec_ptr);
3138 LLVMBuildStore(gallivm->builder, uint_bld->zero,
3139 bld->total_emitted_vertices_vec_ptr);
3140 }
3141 }
3142
3143 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3144 {
3145 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3146 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3147
3148 if (0) {
3149 /* for debugging */
3150 emit_dump_temps(bld);
3151 }
3152
3153 /* If we have indirect addressing in outputs we need to copy our alloca array
3154 * to the outputs slots specified by the caller */
3155 if (bld->gs_iface) {
3156 LLVMValueRef total_emitted_vertices_vec;
3157 LLVMValueRef emitted_prims_vec;
3158 /* implicit end_primitives, needed in case there are any unflushed
3159 vertices in the cache */
3160 end_primitive(NULL, bld_base, NULL);
3161
3162 total_emitted_vertices_vec =
3163 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3164 emitted_prims_vec =
3165 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3166
3167 bld->gs_iface->gs_epilogue(bld->gs_iface,
3168 &bld->bld_base,
3169 total_emitted_vertices_vec,
3170 emitted_prims_vec);
3171 } else {
3172 gather_outputs(bld);
3173 }
3174 }
3175
3176 void
3177 lp_build_tgsi_soa(struct gallivm_state *gallivm,
3178 const struct tgsi_token *tokens,
3179 struct lp_type type,
3180 struct lp_build_mask_context *mask,
3181 LLVMValueRef consts_ptr,
3182 const struct lp_bld_tgsi_system_values *system_values,
3183 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3184 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3185 struct lp_build_sampler_soa *sampler,
3186 const struct tgsi_shader_info *info,
3187 const struct lp_build_tgsi_gs_iface *gs_iface)
3188 {
3189 struct lp_build_tgsi_soa_context bld;
3190
3191 struct lp_type res_type;
3192
3193 assert(type.length <= LP_MAX_VECTOR_LENGTH);
3194 memset(&res_type, 0, sizeof res_type);
3195 res_type.width = type.width;
3196 res_type.length = type.length;
3197 res_type.sign = 1;
3198
3199 /* Setup build context */
3200 memset(&bld, 0, sizeof bld);
3201 lp_build_context_init(&bld.bld_base.base, gallivm, type);
3202 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3203 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3204 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3205 bld.mask = mask;
3206 bld.inputs = inputs;
3207 bld.outputs = outputs;
3208 bld.consts_ptr = consts_ptr;
3209 bld.sampler = sampler;
3210 bld.bld_base.info = info;
3211 bld.indirect_files = info->indirect_files;
3212
3213 bld.bld_base.soa = TRUE;
3214 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3215 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3216 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3217 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3218 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3219 bld.bld_base.emit_store = emit_store;
3220
3221 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3222 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3223
3224 bld.bld_base.emit_prologue = emit_prologue;
3225 bld.bld_base.emit_epilogue = emit_epilogue;
3226
3227 /* Set opcode actions */
3228 lp_set_default_actions_cpu(&bld.bld_base);
3229
3230 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3231 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3232 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3233 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3234 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3235 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3236 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3237 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3238 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3239 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3240 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3241 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3242 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3243 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3244 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3245 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3246 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3247 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3248 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3249 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
3250 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
3251 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3252 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3253 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3254 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3255 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3256 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3257 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3258 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3259 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3260 /* DX10 sampling ops */
3261 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3262 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3263 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3264 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3265 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3266 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3267 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3268 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3269
3270 if (gs_iface) {
3271 /* There's no specific value for this because it should always
3272 * be set, but apps using ext_geometry_shader4 quite often
3273 * were forgetting so we're using MAX_VERTEX_VARYING from
3274 * that spec even though we could debug_assert if it's not
3275 * set, but that's a lot uglier. */
3276 uint max_output_vertices = 32;
3277 uint i = 0;
3278 /* inputs are always indirect with gs */
3279 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3280 bld.gs_iface = gs_iface;
3281 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3282 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3283 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3284
3285 for (i = 0; i < info->num_properties; ++i) {
3286 if (info->properties[i].name ==
3287 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
3288 max_output_vertices = info->properties[i].data[0];
3289 }
3290 }
3291 bld.max_output_vertices_vec =
3292 lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
3293 max_output_vertices);
3294 }
3295
3296 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3297
3298 bld.system_values = *system_values;
3299
3300 lp_build_tgsi_llvm(&bld.bld_base, tokens);
3301
3302 if (0) {
3303 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3304 LLVMValueRef function = LLVMGetBasicBlockParent(block);
3305 debug_printf("11111111111111111111111111111 \n");
3306 tgsi_dump(tokens, 0);
3307 lp_debug_dump_value(function);
3308 debug_printf("2222222222222222222222222222 \n");
3309 }
3310
3311 if (0) {
3312 LLVMModuleRef module = LLVMGetGlobalParent(
3313 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3314 LLVMDumpModule(module);
3315
3316 }
3317 }