gallivm: fix loops and conditionals within GS
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68
69 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
70 {
71 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
72 LLVMBuilderRef builder = bld->gallivm->builder;
73
74 mask->bld = bld;
75 mask->has_mask = FALSE;
76 mask->ret_in_main = FALSE;
77 mask->cond_stack_size = 0;
78 mask->loop_stack_size = 0;
79 mask->call_stack_size = 0;
80
81 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
82 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
83 LLVMConstAllOnes(mask->int_vec_type);
84
85 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
86
87 LLVMBuildStore(
88 builder,
89 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
90 mask->loop_limiter);
91 }
92
93 static void lp_exec_mask_update(struct lp_exec_mask *mask)
94 {
95 LLVMBuilderRef builder = mask->bld->gallivm->builder;
96
97 if (mask->loop_stack_size) {
98 /*for loops we need to update the entire mask at runtime */
99 LLVMValueRef tmp;
100 assert(mask->break_mask);
101 tmp = LLVMBuildAnd(builder,
102 mask->cont_mask,
103 mask->break_mask,
104 "maskcb");
105 mask->exec_mask = LLVMBuildAnd(builder,
106 mask->cond_mask,
107 tmp,
108 "maskfull");
109 } else
110 mask->exec_mask = mask->cond_mask;
111
112 if (mask->call_stack_size || mask->ret_in_main) {
113 mask->exec_mask = LLVMBuildAnd(builder,
114 mask->exec_mask,
115 mask->ret_mask,
116 "callmask");
117 }
118
119 mask->has_mask = (mask->cond_stack_size > 0 ||
120 mask->loop_stack_size > 0 ||
121 mask->call_stack_size > 0 ||
122 mask->ret_in_main);
123 }
124
125 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
126 LLVMValueRef val)
127 {
128 LLVMBuilderRef builder = mask->bld->gallivm->builder;
129
130 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
131 if (mask->cond_stack_size == 0) {
132 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
133 }
134 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
135 assert(LLVMTypeOf(val) == mask->int_vec_type);
136 mask->cond_mask = LLVMBuildAnd(builder,
137 mask->cond_mask,
138 val,
139 "");
140 lp_exec_mask_update(mask);
141 }
142
143 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
144 {
145 LLVMBuilderRef builder = mask->bld->gallivm->builder;
146 LLVMValueRef prev_mask;
147 LLVMValueRef inv_mask;
148
149 assert(mask->cond_stack_size);
150 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
151 if (mask->cond_stack_size == 1) {
152 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
153 }
154
155 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
156
157 mask->cond_mask = LLVMBuildAnd(builder,
158 inv_mask,
159 prev_mask, "");
160 lp_exec_mask_update(mask);
161 }
162
163 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
164 {
165 assert(mask->cond_stack_size);
166 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
167 lp_exec_mask_update(mask);
168 }
169
170 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
171 {
172 LLVMBuilderRef builder = mask->bld->gallivm->builder;
173
174 if (mask->loop_stack_size == 0) {
175 assert(mask->loop_block == NULL);
176 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
177 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
178 assert(mask->break_var == NULL);
179 }
180
181 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
182
183 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
184 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
185 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
186 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
187 ++mask->loop_stack_size;
188
189 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
190 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
191
192 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
193
194 LLVMBuildBr(builder, mask->loop_block);
195 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
196
197 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
198
199 lp_exec_mask_update(mask);
200 }
201
202 static void lp_exec_break(struct lp_exec_mask *mask)
203 {
204 LLVMBuilderRef builder = mask->bld->gallivm->builder;
205 LLVMValueRef exec_mask = LLVMBuildNot(builder,
206 mask->exec_mask,
207 "break");
208
209 mask->break_mask = LLVMBuildAnd(builder,
210 mask->break_mask,
211 exec_mask, "break_full");
212
213 lp_exec_mask_update(mask);
214 }
215
216 static void lp_exec_break_condition(struct lp_exec_mask *mask,
217 LLVMValueRef cond)
218 {
219 LLVMBuilderRef builder = mask->bld->gallivm->builder;
220 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
221 mask->exec_mask,
222 cond, "cond_mask");
223 cond_mask = LLVMBuildNot(builder, cond, "break_cond");
224
225 mask->break_mask = LLVMBuildAnd(builder,
226 mask->break_mask,
227 cond_mask, "breakc_full");
228
229 lp_exec_mask_update(mask);
230 }
231
232 static void lp_exec_continue(struct lp_exec_mask *mask)
233 {
234 LLVMBuilderRef builder = mask->bld->gallivm->builder;
235 LLVMValueRef exec_mask = LLVMBuildNot(builder,
236 mask->exec_mask,
237 "");
238
239 mask->cont_mask = LLVMBuildAnd(builder,
240 mask->cont_mask,
241 exec_mask, "");
242
243 lp_exec_mask_update(mask);
244 }
245
246
247 static void lp_exec_endloop(struct gallivm_state *gallivm,
248 struct lp_exec_mask *mask)
249 {
250 LLVMBuilderRef builder = mask->bld->gallivm->builder;
251 LLVMBasicBlockRef endloop;
252 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
253 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
254 mask->bld->type.width *
255 mask->bld->type.length);
256 LLVMValueRef i1cond, i2cond, icond, limiter;
257
258 assert(mask->break_mask);
259
260 /*
261 * Restore the cont_mask, but don't pop
262 */
263 assert(mask->loop_stack_size);
264 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
265 lp_exec_mask_update(mask);
266
267 /*
268 * Unlike the continue mask, the break_mask must be preserved across loop
269 * iterations
270 */
271 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
272
273 /* Decrement the loop limiter */
274 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
275
276 limiter = LLVMBuildSub(
277 builder,
278 limiter,
279 LLVMConstInt(int_type, 1, false),
280 "");
281
282 LLVMBuildStore(builder, limiter, mask->loop_limiter);
283
284 /* i1cond = (mask != 0) */
285 i1cond = LLVMBuildICmp(
286 builder,
287 LLVMIntNE,
288 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
289 LLVMConstNull(reg_type), "i1cond");
290
291 /* i2cond = (looplimiter > 0) */
292 i2cond = LLVMBuildICmp(
293 builder,
294 LLVMIntSGT,
295 limiter,
296 LLVMConstNull(int_type), "i2cond");
297
298 /* if( i1cond && i2cond ) */
299 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
300
301 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
302
303 LLVMBuildCondBr(builder,
304 icond, mask->loop_block, endloop);
305
306 LLVMPositionBuilderAtEnd(builder, endloop);
307
308 assert(mask->loop_stack_size);
309 --mask->loop_stack_size;
310 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
311 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
312 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
313 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
314
315 lp_exec_mask_update(mask);
316 }
317
318 /* stores val into an address pointed to by dst.
319 * mask->exec_mask is used to figure out which bits of val
320 * should be stored into the address
321 * (0 means don't store this bit, 1 means do store).
322 */
323 static void lp_exec_mask_store(struct lp_exec_mask *mask,
324 struct lp_build_context *bld_store,
325 LLVMValueRef pred,
326 LLVMValueRef val,
327 LLVMValueRef dst)
328 {
329 LLVMBuilderRef builder = mask->bld->gallivm->builder;
330
331 /* Mix the predicate and execution mask */
332 if (mask->has_mask) {
333 if (pred) {
334 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
335 } else {
336 pred = mask->exec_mask;
337 }
338 }
339
340 if (pred) {
341 LLVMValueRef real_val, dst_val;
342
343 dst_val = LLVMBuildLoad(builder, dst, "");
344 real_val = lp_build_select(bld_store,
345 pred,
346 val, dst_val);
347
348 LLVMBuildStore(builder, real_val, dst);
349 } else
350 LLVMBuildStore(builder, val, dst);
351 }
352
353 static void lp_exec_mask_call(struct lp_exec_mask *mask,
354 int func,
355 int *pc)
356 {
357 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
358 mask->call_stack[mask->call_stack_size].pc = *pc;
359 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
360 mask->call_stack_size++;
361 *pc = func;
362 }
363
364 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
365 {
366 LLVMBuilderRef builder = mask->bld->gallivm->builder;
367 LLVMValueRef exec_mask;
368
369 if (mask->cond_stack_size == 0 &&
370 mask->loop_stack_size == 0 &&
371 mask->call_stack_size == 0) {
372 /* returning from main() */
373 *pc = -1;
374 return;
375 }
376
377 if (mask->call_stack_size == 0) {
378 /*
379 * This requires special handling since we need to ensure
380 * we don't drop the mask even if we have no call stack
381 * (e.g. after a ret in a if clause after the endif)
382 */
383 mask->ret_in_main = TRUE;
384 }
385
386 exec_mask = LLVMBuildNot(builder,
387 mask->exec_mask,
388 "ret");
389
390 mask->ret_mask = LLVMBuildAnd(builder,
391 mask->ret_mask,
392 exec_mask, "ret_full");
393
394 lp_exec_mask_update(mask);
395 }
396
397 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
398 {
399 }
400
401 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
402 {
403 assert(mask->call_stack_size);
404 mask->call_stack_size--;
405 *pc = mask->call_stack[mask->call_stack_size].pc;
406 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
407 lp_exec_mask_update(mask);
408 }
409
410
411 /**
412 * Return pointer to a temporary register channel (src or dest).
413 * Note that indirect addressing cannot be handled here.
414 * \param index which temporary register
415 * \param chan which channel of the temp register.
416 */
417 LLVMValueRef
418 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
419 unsigned index,
420 unsigned chan)
421 {
422 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
423 assert(chan < 4);
424 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
425 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
426 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
427 }
428 else {
429 return bld->temps[index][chan];
430 }
431 }
432
433 /**
434 * Return pointer to a output register channel (src or dest).
435 * Note that indirect addressing cannot be handled here.
436 * \param index which output register
437 * \param chan which channel of the output register.
438 */
439 LLVMValueRef
440 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
441 unsigned index,
442 unsigned chan)
443 {
444 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
445 assert(chan < 4);
446 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
447 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
448 index * 4 + chan);
449 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
450 }
451 else {
452 return bld->outputs[index][chan];
453 }
454 }
455
456 /*
457 * If we have indirect addressing in outputs copy our alloca array
458 * to the outputs slots specified by the caller to make sure
459 * our outputs are delivered consistently via the same interface.
460 */
461 static void
462 gather_outputs(struct lp_build_tgsi_soa_context * bld)
463 {
464 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
465 unsigned index, chan;
466 assert(bld->bld_base.info->num_outputs <=
467 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
468 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
469 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
470 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
471 }
472 }
473 }
474 }
475
476 /**
477 * Gather vector.
478 * XXX the lp_build_gather() function should be capable of doing this
479 * with a little work.
480 */
481 static LLVMValueRef
482 build_gather(struct lp_build_context *bld,
483 LLVMValueRef base_ptr,
484 LLVMValueRef indexes)
485 {
486 LLVMBuilderRef builder = bld->gallivm->builder;
487 LLVMValueRef res = bld->undef;
488 unsigned i;
489
490 /*
491 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
492 */
493 for (i = 0; i < bld->type.length; i++) {
494 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
495 LLVMValueRef index = LLVMBuildExtractElement(builder,
496 indexes, ii, "");
497 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
498 &index, 1, "gather_ptr");
499 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
500
501 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
502 }
503
504 return res;
505 }
506
507
508 /**
509 * Scatter/store vector.
510 */
511 static void
512 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
513 LLVMValueRef base_ptr,
514 LLVMValueRef indexes,
515 LLVMValueRef values,
516 struct lp_exec_mask *mask,
517 LLVMValueRef pred)
518 {
519 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
520 LLVMBuilderRef builder = gallivm->builder;
521 unsigned i;
522
523 /* Mix the predicate and execution mask */
524 if (mask->has_mask) {
525 if (pred) {
526 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
527 }
528 else {
529 pred = mask->exec_mask;
530 }
531 }
532
533 /*
534 * Loop over elements of index_vec, store scalar value.
535 */
536 for (i = 0; i < bld->bld_base.base.type.length; i++) {
537 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
538 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
539 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
540 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
541 LLVMValueRef scalar_pred = pred ?
542 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
543
544 if (0)
545 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
546 ii, val, index, scalar_ptr);
547
548 if (scalar_pred) {
549 LLVMValueRef real_val, dst_val;
550 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
551 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
552 LLVMBuildStore(builder, real_val, scalar_ptr);
553 }
554 else {
555 LLVMBuildStore(builder, val, scalar_ptr);
556 }
557 }
558 }
559
560
561 /**
562 * Read the current value of the ADDR register, convert the floats to
563 * ints, add the base index and return the vector of offsets.
564 * The offsets will be used to index into the constant buffer or
565 * temporary register file.
566 */
567 static LLVMValueRef
568 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
569 unsigned reg_file, unsigned reg_index,
570 const struct tgsi_ind_register *indirect_reg)
571 {
572 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
573 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
574 /* always use X component of address register */
575 unsigned swizzle = indirect_reg->Swizzle;
576 LLVMValueRef base;
577 LLVMValueRef rel;
578 LLVMValueRef max_index;
579 LLVMValueRef index;
580
581 assert(bld->indirect_files & (1 << reg_file));
582
583 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
584
585 assert(swizzle < 4);
586 switch (indirect_reg->File) {
587 case TGSI_FILE_ADDRESS:
588 rel = LLVMBuildLoad(builder,
589 bld->addr[indirect_reg->Index][swizzle],
590 "load addr reg");
591 /* ADDR LLVM values already have LLVM integer type. */
592 break;
593 case TGSI_FILE_TEMPORARY:
594 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
595 rel = LLVMBuildLoad(builder, rel, "load temp reg");
596 /* TEMP LLVM values always have LLVM float type, but for indirection, the
597 * value actually stored is expected to be an integer */
598 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
599 break;
600 default:
601 assert(0);
602 rel = uint_bld->zero;
603 }
604
605 index = lp_build_add(uint_bld, base, rel);
606
607 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
608 uint_bld->type,
609 bld->bld_base.info->file_max[reg_file]);
610
611 assert(!uint_bld->type.sign);
612 index = lp_build_min(uint_bld, index, max_index);
613
614 return index;
615 }
616
617 static struct lp_build_context *
618 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
619 enum tgsi_opcode_type stype)
620 {
621 struct lp_build_context *bld_fetch;
622
623 switch (stype) {
624 case TGSI_TYPE_FLOAT:
625 case TGSI_TYPE_UNTYPED:
626 bld_fetch = &bld_base->base;
627 break;
628 case TGSI_TYPE_UNSIGNED:
629 bld_fetch = &bld_base->uint_bld;
630 break;
631 case TGSI_TYPE_SIGNED:
632 bld_fetch = &bld_base->int_bld;
633 break;
634 case TGSI_TYPE_VOID:
635 case TGSI_TYPE_DOUBLE:
636 default:
637 assert(0);
638 bld_fetch = NULL;
639 break;
640 }
641 return bld_fetch;
642 }
643
644 static LLVMValueRef
645 emit_fetch_constant(
646 struct lp_build_tgsi_context * bld_base,
647 const struct tgsi_full_src_register * reg,
648 enum tgsi_opcode_type stype,
649 unsigned swizzle)
650 {
651 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
652 struct gallivm_state *gallivm = bld_base->base.gallivm;
653 LLVMBuilderRef builder = gallivm->builder;
654 struct lp_build_context *uint_bld = &bld_base->uint_bld;
655 LLVMValueRef indirect_index = NULL;
656 unsigned dimension = 0;
657 LLVMValueRef dimension_index;
658 LLVMValueRef consts_ptr;
659 LLVMValueRef res;
660
661 /* XXX: Handle fetching xyzw components as a vector */
662 assert(swizzle != ~0);
663
664 if (reg->Register.Dimension) {
665 assert(!reg->Dimension.Indirect);
666 dimension = reg->Dimension.Index;
667 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
668 }
669
670 dimension_index = lp_build_const_int32(gallivm, dimension);
671 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
672
673 if (reg->Register.Indirect) {
674 indirect_index = get_indirect_index(bld,
675 reg->Register.File,
676 reg->Register.Index,
677 &reg->Indirect);
678 }
679
680 if (reg->Register.Indirect) {
681 LLVMValueRef swizzle_vec =
682 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
683 LLVMValueRef index_vec; /* index into the const buffer */
684
685 /* index_vec = indirect_index * 4 + swizzle */
686 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
687 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
688
689 /* Gather values from the constant buffer */
690 res = build_gather(&bld_base->base, consts_ptr, index_vec);
691 }
692 else {
693 LLVMValueRef index; /* index into the const buffer */
694 LLVMValueRef scalar, scalar_ptr;
695
696 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
697
698 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
699 &index, 1, "");
700 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
701 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
702 }
703
704 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
705 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
706 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
707 }
708 return res;
709 }
710
711 static LLVMValueRef
712 emit_fetch_immediate(
713 struct lp_build_tgsi_context * bld_base,
714 const struct tgsi_full_src_register * reg,
715 enum tgsi_opcode_type stype,
716 unsigned swizzle)
717 {
718 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
719 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
720 assert(res);
721
722 if (stype == TGSI_TYPE_UNSIGNED) {
723 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
724 } else if (stype == TGSI_TYPE_SIGNED) {
725 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
726 }
727 return res;
728 }
729
730 static LLVMValueRef
731 emit_fetch_input(
732 struct lp_build_tgsi_context * bld_base,
733 const struct tgsi_full_src_register * reg,
734 enum tgsi_opcode_type stype,
735 unsigned swizzle)
736 {
737 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
738 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
739 LLVMBuilderRef builder = gallivm->builder;
740 struct lp_build_context *uint_bld = &bld_base->uint_bld;
741 LLVMValueRef indirect_index = NULL;
742 LLVMValueRef res;
743
744 if (reg->Register.Indirect) {
745 indirect_index = get_indirect_index(bld,
746 reg->Register.File,
747 reg->Register.Index,
748 &reg->Indirect);
749 }
750
751 if (reg->Register.Indirect) {
752 LLVMValueRef swizzle_vec =
753 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
754 LLVMValueRef length_vec =
755 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
756 LLVMValueRef index_vec; /* index into the const buffer */
757 LLVMValueRef inputs_array;
758 LLVMTypeRef float4_ptr_type;
759
760 /* index_vec = (indirect_index * 4 + swizzle) * length */
761 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
762 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
763 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
764
765 /* cast inputs_array pointer to float* */
766 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
767 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
768 float4_ptr_type, "");
769
770 /* Gather values from the temporary register array */
771 res = build_gather(&bld_base->base, inputs_array, index_vec);
772 } else {
773 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
774 LLVMValueRef lindex = lp_build_const_int32(gallivm,
775 reg->Register.Index * 4 + swizzle);
776 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
777 bld->inputs_array, &lindex, 1, "");
778 res = LLVMBuildLoad(builder, input_ptr, "");
779 }
780 else {
781 res = bld->inputs[reg->Register.Index][swizzle];
782 }
783 }
784
785 assert(res);
786
787 if (stype == TGSI_TYPE_UNSIGNED) {
788 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
789 } else if (stype == TGSI_TYPE_SIGNED) {
790 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
791 }
792
793 return res;
794 }
795
796
797 static LLVMValueRef
798 emit_fetch_gs_input(
799 struct lp_build_tgsi_context * bld_base,
800 const struct tgsi_full_src_register * reg,
801 enum tgsi_opcode_type stype,
802 unsigned swizzle)
803 {
804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
805 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
806 LLVMBuilderRef builder = gallivm->builder;
807 LLVMValueRef attrib_index = NULL;
808 LLVMValueRef vertex_index = NULL;
809 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
810 LLVMValueRef res;
811
812 if (reg->Register.Indirect) {
813 attrib_index = get_indirect_index(bld,
814 reg->Register.File,
815 reg->Register.Index,
816 &reg->Indirect);
817 } else {
818 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
819 }
820
821 if (reg->Dimension.Indirect) {
822 vertex_index = get_indirect_index(bld,
823 reg->Register.File,
824 reg->Dimension.Index,
825 &reg->DimIndirect);
826 } else {
827 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
828 }
829
830
831 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
832 vertex_index, attrib_index,
833 swizzle_index);
834
835 assert(res);
836
837 if (stype == TGSI_TYPE_UNSIGNED) {
838 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
839 } else if (stype == TGSI_TYPE_SIGNED) {
840 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
841 }
842
843 return res;
844 }
845
846 static LLVMValueRef
847 emit_fetch_temporary(
848 struct lp_build_tgsi_context * bld_base,
849 const struct tgsi_full_src_register * reg,
850 enum tgsi_opcode_type stype,
851 unsigned swizzle)
852 {
853 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
854 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
855 LLVMBuilderRef builder = gallivm->builder;
856 struct lp_build_context *uint_bld = &bld_base->uint_bld;
857 LLVMValueRef indirect_index = NULL;
858 LLVMValueRef res;
859
860 if (reg->Register.Indirect) {
861 indirect_index = get_indirect_index(bld,
862 reg->Register.File,
863 reg->Register.Index,
864 &reg->Indirect);
865 }
866
867 if (reg->Register.Indirect) {
868 LLVMValueRef swizzle_vec =
869 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
870 LLVMValueRef length_vec =
871 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
872 bld->bld_base.base.type.length);
873 LLVMValueRef index_vec; /* index into the const buffer */
874 LLVMValueRef temps_array;
875 LLVMTypeRef float4_ptr_type;
876
877 /* index_vec = (indirect_index * 4 + swizzle) * length */
878 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
879 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
880 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
881
882 /* cast temps_array pointer to float* */
883 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
884 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
885 float4_ptr_type, "");
886
887 /* Gather values from the temporary register array */
888 res = build_gather(&bld_base->base, temps_array, index_vec);
889 }
890 else {
891 LLVMValueRef temp_ptr;
892 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
893 res = LLVMBuildLoad(builder, temp_ptr, "");
894 }
895
896 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
897 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
898 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
899 }
900
901 return res;
902 }
903
904 static LLVMValueRef
905 emit_fetch_system_value(
906 struct lp_build_tgsi_context * bld_base,
907 const struct tgsi_full_src_register * reg,
908 enum tgsi_opcode_type stype,
909 unsigned swizzle)
910 {
911 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
912 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
913 const struct tgsi_shader_info *info = bld->bld_base.info;
914 LLVMBuilderRef builder = gallivm->builder;
915 LLVMValueRef res;
916 enum tgsi_opcode_type atype; // Actual type of the value
917
918 assert(!reg->Register.Indirect);
919
920 switch (info->system_value_semantic_name[reg->Register.Index]) {
921 case TGSI_SEMANTIC_INSTANCEID:
922 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
923 atype = TGSI_TYPE_UNSIGNED;
924 break;
925
926 case TGSI_SEMANTIC_VERTEXID:
927 res = bld->system_values.vertex_id;
928 atype = TGSI_TYPE_UNSIGNED;
929 break;
930
931 case TGSI_SEMANTIC_PRIMID:
932 res = bld->system_values.prim_id;
933 atype = TGSI_TYPE_UNSIGNED;
934 break;
935
936 default:
937 assert(!"unexpected semantic in emit_fetch_system_value");
938 res = bld_base->base.zero;
939 atype = TGSI_TYPE_FLOAT;
940 break;
941 }
942
943 if (atype != stype) {
944 if (stype == TGSI_TYPE_FLOAT) {
945 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
946 } else if (stype == TGSI_TYPE_UNSIGNED) {
947 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
948 } else if (stype == TGSI_TYPE_SIGNED) {
949 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
950 }
951 }
952
953 return res;
954 }
955
956 /**
957 * Register fetch with derivatives.
958 */
959 static void
960 emit_fetch_deriv(
961 struct lp_build_tgsi_soa_context *bld,
962 LLVMValueRef src,
963 LLVMValueRef *res,
964 LLVMValueRef *ddx,
965 LLVMValueRef *ddy)
966 {
967 if(res)
968 *res = src;
969
970 /* TODO: use interpolation coeffs for inputs */
971
972 if(ddx)
973 *ddx = lp_build_ddx(&bld->bld_base.base, src);
974
975 if(ddy)
976 *ddy = lp_build_ddy(&bld->bld_base.base, src);
977 }
978
979
980 /**
981 * Predicate.
982 */
983 static void
984 emit_fetch_predicate(
985 struct lp_build_tgsi_soa_context *bld,
986 const struct tgsi_full_instruction *inst,
987 LLVMValueRef *pred)
988 {
989 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
990 unsigned index;
991 unsigned char swizzles[4];
992 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
993 LLVMValueRef value;
994 unsigned chan;
995
996 if (!inst->Instruction.Predicate) {
997 TGSI_FOR_EACH_CHANNEL( chan ) {
998 pred[chan] = NULL;
999 }
1000 return;
1001 }
1002
1003 swizzles[0] = inst->Predicate.SwizzleX;
1004 swizzles[1] = inst->Predicate.SwizzleY;
1005 swizzles[2] = inst->Predicate.SwizzleZ;
1006 swizzles[3] = inst->Predicate.SwizzleW;
1007
1008 index = inst->Predicate.Index;
1009 assert(index < LP_MAX_TGSI_PREDS);
1010
1011 TGSI_FOR_EACH_CHANNEL( chan ) {
1012 unsigned swizzle = swizzles[chan];
1013
1014 /*
1015 * Only fetch the predicate register channels that are actually listed
1016 * in the swizzles
1017 */
1018 if (!unswizzled[swizzle]) {
1019 value = LLVMBuildLoad(builder,
1020 bld->preds[index][swizzle], "");
1021
1022 /*
1023 * Convert the value to an integer mask.
1024 *
1025 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1026 * is needlessly causing two comparisons due to storing the intermediate
1027 * result as float vector instead of an integer mask vector.
1028 */
1029 value = lp_build_compare(bld->bld_base.base.gallivm,
1030 bld->bld_base.base.type,
1031 PIPE_FUNC_NOTEQUAL,
1032 value,
1033 bld->bld_base.base.zero);
1034 if (inst->Predicate.Negate) {
1035 value = LLVMBuildNot(builder, value, "");
1036 }
1037
1038 unswizzled[swizzle] = value;
1039 } else {
1040 value = unswizzled[swizzle];
1041 }
1042
1043 pred[chan] = value;
1044 }
1045 }
1046
1047 /**
1048 * Register store.
1049 */
1050 static void
1051 emit_store_chan(
1052 struct lp_build_tgsi_context *bld_base,
1053 const struct tgsi_full_instruction *inst,
1054 unsigned index,
1055 unsigned chan_index,
1056 LLVMValueRef pred,
1057 LLVMValueRef value)
1058 {
1059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1060 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1061 LLVMBuilderRef builder = gallivm->builder;
1062 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1063 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1064 LLVMValueRef indirect_index = NULL;
1065 struct lp_build_context *bld_store;
1066 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1067
1068 switch (dtype) {
1069 default:
1070 case TGSI_TYPE_FLOAT:
1071 case TGSI_TYPE_UNTYPED:
1072 bld_store = &bld_base->base;
1073 break;
1074 case TGSI_TYPE_UNSIGNED:
1075 bld_store = &bld_base->uint_bld;
1076 break;
1077 case TGSI_TYPE_SIGNED:
1078 bld_store = &bld_base->int_bld;
1079 break;
1080 case TGSI_TYPE_DOUBLE:
1081 case TGSI_TYPE_VOID:
1082 assert(0);
1083 bld_store = NULL;
1084 break;
1085 }
1086
1087 switch( inst->Instruction.Saturate ) {
1088 case TGSI_SAT_NONE:
1089 break;
1090
1091 case TGSI_SAT_ZERO_ONE:
1092 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
1093 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
1094 break;
1095
1096 case TGSI_SAT_MINUS_PLUS_ONE:
1097 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
1098 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
1099 break;
1100
1101 default:
1102 assert(0);
1103 }
1104
1105 if (reg->Register.Indirect) {
1106 indirect_index = get_indirect_index(bld,
1107 reg->Register.File,
1108 reg->Register.Index,
1109 &reg->Indirect);
1110 } else {
1111 assert(reg->Register.Index <=
1112 bld->bld_base.info->file_max[reg->Register.File]);
1113 }
1114
1115 switch( reg->Register.File ) {
1116 case TGSI_FILE_OUTPUT:
1117 if (reg->Register.Indirect) {
1118 LLVMValueRef chan_vec =
1119 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1120 LLVMValueRef length_vec =
1121 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1122 LLVMValueRef index_vec; /* indexes into the temp registers */
1123 LLVMValueRef outputs_array;
1124 LLVMValueRef pixel_offsets;
1125 LLVMTypeRef float_ptr_type;
1126 int i;
1127
1128 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1129 pixel_offsets = uint_bld->undef;
1130 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1131 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1132 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1133 ii, ii, "");
1134 }
1135
1136 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1137 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1138 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1139 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1140 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1141
1142 float_ptr_type =
1143 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1144 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1145 float_ptr_type, "");
1146
1147 /* Scatter store values into temp registers */
1148 emit_mask_scatter(bld, outputs_array, index_vec, value,
1149 &bld->exec_mask, pred);
1150 }
1151 else {
1152 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1153 chan_index);
1154 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1155 }
1156 break;
1157
1158 case TGSI_FILE_TEMPORARY:
1159 if (reg->Register.Indirect) {
1160 LLVMValueRef chan_vec =
1161 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1162 LLVMValueRef length_vec =
1163 lp_build_const_int_vec(gallivm, uint_bld->type,
1164 bld->bld_base.base.type.length);
1165 LLVMValueRef index_vec; /* indexes into the temp registers */
1166 LLVMValueRef temps_array;
1167 LLVMValueRef pixel_offsets;
1168 LLVMTypeRef float_ptr_type;
1169 int i;
1170
1171 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1172 pixel_offsets = uint_bld->undef;
1173 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1174 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1175 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1176 ii, ii, "");
1177 }
1178
1179 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1180 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1181 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1182 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1183 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1184
1185 float_ptr_type =
1186 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1187 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1188 float_ptr_type, "");
1189
1190 /* Scatter store values into temp registers */
1191 emit_mask_scatter(bld, temps_array, index_vec, value,
1192 &bld->exec_mask, pred);
1193 }
1194 else {
1195 LLVMValueRef temp_ptr;
1196
1197 switch (dtype) {
1198 case TGSI_TYPE_UNSIGNED:
1199 case TGSI_TYPE_SIGNED: {
1200 LLVMTypeRef itype = bld_base->int_bld.vec_type;
1201 LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1202 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1203 chan_index);
1204 LLVMValueRef temp_value_ptr;
1205
1206 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1207 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1208 value = temp_value_ptr;
1209 break;
1210 }
1211 default:
1212 case TGSI_TYPE_FLOAT:
1213 case TGSI_TYPE_UNTYPED:
1214 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1215 chan_index);
1216 break;
1217 }
1218
1219 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1220 }
1221 break;
1222
1223 case TGSI_FILE_ADDRESS:
1224 assert(dtype == TGSI_TYPE_SIGNED);
1225 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1226 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1227 bld->addr[reg->Register.Index][chan_index]);
1228 break;
1229
1230 case TGSI_FILE_PREDICATE:
1231 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1232 bld->preds[reg->Register.Index][chan_index]);
1233 break;
1234
1235 default:
1236 assert( 0 );
1237 }
1238 }
1239
1240 static void
1241 emit_store(
1242 struct lp_build_tgsi_context * bld_base,
1243 const struct tgsi_full_instruction * inst,
1244 const struct tgsi_opcode_info * info,
1245 LLVMValueRef dst[4])
1246
1247 {
1248 unsigned chan_index;
1249 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1250
1251 if(info->num_dst) {
1252 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1253
1254 emit_fetch_predicate( bld, inst, pred );
1255
1256 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1257 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1258 }
1259 }
1260 }
1261
1262 /**
1263 * High-level instruction translators.
1264 */
1265
1266 static void
1267 emit_tex( struct lp_build_tgsi_soa_context *bld,
1268 const struct tgsi_full_instruction *inst,
1269 enum lp_build_tex_modifier modifier,
1270 LLVMValueRef *texel)
1271 {
1272 unsigned unit;
1273 LLVMValueRef lod_bias, explicit_lod;
1274 LLVMValueRef oow = NULL;
1275 LLVMValueRef coords[4];
1276 LLVMValueRef offsets[3] = { NULL };
1277 struct lp_derivatives derivs;
1278 struct lp_derivatives *deriv_ptr = NULL;
1279 unsigned num_coords, num_derivs, num_offsets;
1280 unsigned i;
1281
1282 if (!bld->sampler) {
1283 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1284 for (i = 0; i < 4; i++) {
1285 texel[i] = bld->bld_base.base.undef;
1286 }
1287 return;
1288 }
1289
1290 switch (inst->Texture.Texture) {
1291 case TGSI_TEXTURE_1D:
1292 num_coords = 1;
1293 num_offsets = 1;
1294 num_derivs = 1;
1295 break;
1296 case TGSI_TEXTURE_1D_ARRAY:
1297 num_coords = 2;
1298 num_offsets = 1;
1299 num_derivs = 1;
1300 break;
1301 case TGSI_TEXTURE_2D:
1302 case TGSI_TEXTURE_RECT:
1303 num_coords = 2;
1304 num_offsets = 2;
1305 num_derivs = 2;
1306 break;
1307 case TGSI_TEXTURE_SHADOW1D:
1308 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1309 num_coords = 3;
1310 num_offsets = 1;
1311 num_derivs = 1;
1312 break;
1313 case TGSI_TEXTURE_SHADOW2D:
1314 case TGSI_TEXTURE_SHADOWRECT:
1315 case TGSI_TEXTURE_2D_ARRAY:
1316 num_coords = 3;
1317 num_offsets = 2;
1318 num_derivs = 2;
1319 break;
1320 case TGSI_TEXTURE_CUBE:
1321 num_coords = 3;
1322 num_offsets = 2;
1323 num_derivs = 3;
1324 break;
1325 case TGSI_TEXTURE_3D:
1326 num_coords = 3;
1327 num_offsets = 3;
1328 num_derivs = 3;
1329 break;
1330 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1331 num_coords = 4;
1332 num_offsets = 2;
1333 num_derivs = 2;
1334 break;
1335 case TGSI_TEXTURE_SHADOWCUBE:
1336 num_coords = 4;
1337 num_offsets = 2;
1338 num_derivs = 3;
1339 break;
1340 default:
1341 assert(0);
1342 return;
1343 }
1344
1345 /* Note lod and especially projected are illegal in a LOT of cases */
1346 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1347 assert(num_coords < 4);
1348 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1349 explicit_lod = NULL;
1350 }
1351 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1352 assert(num_coords < 4);
1353 lod_bias = NULL;
1354 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1355 }
1356 else {
1357 lod_bias = NULL;
1358 explicit_lod = NULL;
1359 }
1360
1361 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1362 assert(num_coords < 4);
1363 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1364 oow = lp_build_rcp(&bld->bld_base.base, oow);
1365 }
1366
1367 for (i = 0; i < num_coords; i++) {
1368 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1369 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1370 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1371 }
1372 for (i = num_coords; i < 4; i++) {
1373 coords[i] = bld->bld_base.base.undef;
1374 }
1375
1376 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1377 unsigned dim;
1378 for (dim = 0; dim < num_derivs; ++dim) {
1379 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1380 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1381 }
1382 deriv_ptr = &derivs;
1383 unit = inst->Src[3].Register.Index;
1384 } else {
1385 unit = inst->Src[1].Register.Index;
1386 }
1387
1388 /* some advanced gather instructions (txgo) would require 4 offsets */
1389 if (inst->Texture.NumOffsets == 1) {
1390 unsigned dim;
1391 for (dim = 0; dim < num_offsets; dim++) {
1392 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1393 }
1394 }
1395
1396 bld->sampler->emit_fetch_texel(bld->sampler,
1397 bld->bld_base.base.gallivm,
1398 bld->bld_base.base.type,
1399 FALSE,
1400 unit, unit,
1401 coords,
1402 offsets,
1403 deriv_ptr,
1404 lod_bias, explicit_lod,
1405 texel);
1406 }
1407
1408 static void
1409 emit_sample(struct lp_build_tgsi_soa_context *bld,
1410 const struct tgsi_full_instruction *inst,
1411 enum lp_build_tex_modifier modifier,
1412 boolean compare,
1413 LLVMValueRef *texel)
1414 {
1415 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1416 unsigned texture_unit, sampler_unit;
1417 LLVMValueRef lod_bias, explicit_lod;
1418 LLVMValueRef coords[4];
1419 LLVMValueRef offsets[3] = { NULL };
1420 struct lp_derivatives derivs;
1421 struct lp_derivatives *deriv_ptr = NULL;
1422 unsigned num_coords, num_offsets, num_derivs;
1423 unsigned i;
1424
1425 if (!bld->sampler) {
1426 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1427 for (i = 0; i < 4; i++) {
1428 texel[i] = bld->bld_base.base.undef;
1429 }
1430 return;
1431 }
1432
1433 /*
1434 * unlike old-style tex opcodes the texture/sampler indices
1435 * always come from src1 and src2 respectively.
1436 */
1437 texture_unit = inst->Src[1].Register.Index;
1438 sampler_unit = inst->Src[2].Register.Index;
1439
1440 /*
1441 * Note inst->Texture.Texture will contain the number of offsets,
1442 * however the target information is NOT there and comes from the
1443 * declared sampler views instead.
1444 */
1445 switch (bld->sv[texture_unit].Resource) {
1446 case TGSI_TEXTURE_1D:
1447 num_coords = 1;
1448 num_offsets = 1;
1449 num_derivs = 1;
1450 break;
1451 case TGSI_TEXTURE_1D_ARRAY:
1452 num_coords = 2;
1453 num_offsets = 1;
1454 num_derivs = 1;
1455 break;
1456 case TGSI_TEXTURE_2D:
1457 case TGSI_TEXTURE_RECT:
1458 num_coords = 2;
1459 num_offsets = 2;
1460 num_derivs = 2;
1461 break;
1462 case TGSI_TEXTURE_2D_ARRAY:
1463 num_coords = 3;
1464 num_offsets = 2;
1465 num_derivs = 2;
1466 break;
1467 case TGSI_TEXTURE_CUBE:
1468 num_coords = 3;
1469 num_offsets = 2;
1470 num_derivs = 3;
1471 break;
1472 case TGSI_TEXTURE_3D:
1473 num_coords = 3;
1474 num_offsets = 3;
1475 num_derivs = 3;
1476 break;
1477 case TGSI_TEXTURE_CUBE_ARRAY:
1478 num_coords = 4;
1479 num_offsets = 2;
1480 num_derivs = 3;
1481 break;
1482 default:
1483 assert(0);
1484 return;
1485 }
1486
1487 /*
1488 * unlike old-style tex opcodes the texture/sampler indices
1489 * always come from src1 and src2 respectively.
1490 */
1491 texture_unit = inst->Src[1].Register.Index;
1492 sampler_unit = inst->Src[2].Register.Index;
1493
1494 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1495 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1496 explicit_lod = NULL;
1497 }
1498 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1499 lod_bias = NULL;
1500 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1501 }
1502 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1503 lod_bias = NULL;
1504 /* XXX might be better to explicitly pass the level zero information */
1505 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1506 }
1507 else {
1508 lod_bias = NULL;
1509 explicit_lod = NULL;
1510 }
1511
1512 for (i = 0; i < num_coords; i++) {
1513 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1514 }
1515 for (i = num_coords; i < 4; i++) {
1516 coords[i] = bld->bld_base.base.undef;
1517 }
1518 /*
1519 * XXX: whack shadow comparison value into place.
1520 * Should probably fix the interface for separate value
1521 * (it will not work for cube arrays if it is part of coords).
1522 */
1523 if (compare) {
1524 unsigned c_coord = num_coords > 2 ? 3 : 2;
1525 assert(num_coords < 4);
1526 coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1527 }
1528
1529 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1530 unsigned dim;
1531 for (dim = 0; dim < num_derivs; ++dim) {
1532 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
1533 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
1534 }
1535 deriv_ptr = &derivs;
1536 }
1537
1538 /* some advanced gather instructions (txgo) would require 4 offsets */
1539 if (inst->Texture.NumOffsets == 1) {
1540 unsigned dim;
1541 for (dim = 0; dim < num_offsets; dim++) {
1542 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1543 }
1544 }
1545
1546 bld->sampler->emit_fetch_texel(bld->sampler,
1547 bld->bld_base.base.gallivm,
1548 bld->bld_base.base.type,
1549 FALSE,
1550 texture_unit, sampler_unit,
1551 coords,
1552 offsets,
1553 deriv_ptr,
1554 lod_bias, explicit_lod,
1555 texel);
1556 }
1557
1558 static void
1559 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
1560 const struct tgsi_full_instruction *inst,
1561 LLVMValueRef *texel,
1562 boolean is_samplei)
1563 {
1564 unsigned unit, target;
1565 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1566 LLVMValueRef explicit_lod = NULL;
1567 LLVMValueRef coords[3];
1568 LLVMValueRef offsets[3] = { NULL };
1569 unsigned num_coords;
1570 unsigned dims;
1571 unsigned i;
1572
1573 if (!bld->sampler) {
1574 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1575 for (i = 0; i < 4; i++) {
1576 texel[i] = coord_undef;
1577 }
1578 return;
1579 }
1580
1581 unit = inst->Src[1].Register.Index;
1582
1583 if (is_samplei) {
1584 target = bld->sv[unit].Resource;
1585 }
1586 else {
1587 target = inst->Texture.Texture;
1588 }
1589
1590 switch (target) {
1591 case TGSI_TEXTURE_1D:
1592 case TGSI_TEXTURE_BUFFER:
1593 num_coords = 1;
1594 dims = 1;
1595 break;
1596 case TGSI_TEXTURE_1D_ARRAY:
1597 num_coords = 2;
1598 dims = 1;
1599 break;
1600 case TGSI_TEXTURE_2D:
1601 case TGSI_TEXTURE_RECT:
1602 num_coords = 2;
1603 dims = 2;
1604 break;
1605 case TGSI_TEXTURE_2D_ARRAY:
1606 num_coords = 3;
1607 dims = 2;
1608 break;
1609 case TGSI_TEXTURE_3D:
1610 num_coords = 3;
1611 dims = 3;
1612 break;
1613 default:
1614 assert(0);
1615 return;
1616 }
1617
1618 /* always have lod except for buffers ? */
1619 if (target != TGSI_TEXTURE_BUFFER) {
1620 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1621 }
1622
1623 for (i = 0; i < num_coords; i++) {
1624 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1625 }
1626 for (i = num_coords; i < 3; i++) {
1627 coords[i] = coord_undef;
1628 }
1629
1630 if (inst->Texture.NumOffsets == 1) {
1631 unsigned dim;
1632 for (dim = 0; dim < dims; dim++) {
1633 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1634 }
1635 }
1636
1637 bld->sampler->emit_fetch_texel(bld->sampler,
1638 bld->bld_base.base.gallivm,
1639 bld->bld_base.base.type,
1640 TRUE,
1641 unit, unit,
1642 coords,
1643 offsets,
1644 NULL,
1645 NULL, explicit_lod,
1646 texel);
1647 }
1648
1649 static void
1650 emit_size_query( struct lp_build_tgsi_soa_context *bld,
1651 const struct tgsi_full_instruction *inst,
1652 LLVMValueRef *sizes_out,
1653 boolean is_sviewinfo)
1654 {
1655 LLVMValueRef explicit_lod;
1656 unsigned has_lod;
1657 unsigned i;
1658 unsigned unit = inst->Src[1].Register.Index;
1659 unsigned target;
1660
1661 if (is_sviewinfo) {
1662 target = bld->sv[unit].Resource;
1663 }
1664 else {
1665 target = inst->Texture.Texture;
1666 }
1667 switch (target) {
1668 case TGSI_TEXTURE_BUFFER:
1669 case TGSI_TEXTURE_RECT:
1670 case TGSI_TEXTURE_SHADOWRECT:
1671 has_lod = 0;
1672 break;
1673 default:
1674 has_lod = 1;
1675 break;
1676 }
1677
1678 if (!bld->sampler) {
1679 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1680 for (i = 0; i < 4; i++)
1681 sizes_out[i] = bld->bld_base.int_bld.undef;
1682 return;
1683 }
1684
1685 if (has_lod)
1686 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
1687 else
1688 explicit_lod = NULL;
1689
1690 bld->sampler->emit_size_query(bld->sampler,
1691 bld->bld_base.base.gallivm,
1692 bld->bld_base.int_bld.type,
1693 unit,
1694 is_sviewinfo,
1695 explicit_lod,
1696 sizes_out);
1697 }
1698
1699 static boolean
1700 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1701 int pc)
1702 {
1703 int i;
1704
1705 for (i = 0; i < 5; i++) {
1706 unsigned opcode;
1707
1708 if (pc + i >= bld->bld_base.info->num_instructions)
1709 return TRUE;
1710
1711 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1712
1713 if (opcode == TGSI_OPCODE_END)
1714 return TRUE;
1715
1716 if (opcode == TGSI_OPCODE_TEX ||
1717 opcode == TGSI_OPCODE_TXP ||
1718 opcode == TGSI_OPCODE_TXD ||
1719 opcode == TGSI_OPCODE_TXB ||
1720 opcode == TGSI_OPCODE_TXL ||
1721 opcode == TGSI_OPCODE_TXF ||
1722 opcode == TGSI_OPCODE_TXQ ||
1723 opcode == TGSI_OPCODE_CAL ||
1724 opcode == TGSI_OPCODE_CALLNZ ||
1725 opcode == TGSI_OPCODE_IF ||
1726 opcode == TGSI_OPCODE_IFC ||
1727 opcode == TGSI_OPCODE_BGNLOOP ||
1728 opcode == TGSI_OPCODE_SWITCH)
1729 return FALSE;
1730 }
1731
1732 return TRUE;
1733 }
1734
1735
1736
1737 /**
1738 * Kill fragment if any of the src register values are negative.
1739 */
1740 static void
1741 emit_kil(
1742 struct lp_build_tgsi_soa_context *bld,
1743 const struct tgsi_full_instruction *inst,
1744 int pc)
1745 {
1746 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1747 const struct tgsi_full_src_register *reg = &inst->Src[0];
1748 LLVMValueRef terms[TGSI_NUM_CHANNELS];
1749 LLVMValueRef mask;
1750 unsigned chan_index;
1751
1752 memset(&terms, 0, sizeof terms);
1753
1754 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1755 unsigned swizzle;
1756
1757 /* Unswizzle channel */
1758 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1759
1760 /* Check if the component has not been already tested. */
1761 assert(swizzle < TGSI_NUM_CHANNELS);
1762 if( !terms[swizzle] )
1763 /* TODO: change the comparison operator instead of setting the sign */
1764 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1765 }
1766
1767 mask = NULL;
1768 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1769 if(terms[chan_index]) {
1770 LLVMValueRef chan_mask;
1771
1772 /*
1773 * If term < 0 then mask = 0 else mask = ~0.
1774 */
1775 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1776
1777 if(mask)
1778 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1779 else
1780 mask = chan_mask;
1781 }
1782 }
1783
1784 if(mask) {
1785 lp_build_mask_update(bld->mask, mask);
1786
1787 if (!near_end_of_shader(bld, pc))
1788 lp_build_mask_check(bld->mask);
1789 }
1790 }
1791
1792
1793 /**
1794 * Predicated fragment kill.
1795 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1796 * The only predication is the execution mask which will apply if
1797 * we're inside a loop or conditional.
1798 */
1799 static void
1800 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1801 int pc)
1802 {
1803 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1804 LLVMValueRef mask;
1805
1806 /* For those channels which are "alive", disable fragment shader
1807 * execution.
1808 */
1809 if (bld->exec_mask.has_mask) {
1810 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1811 }
1812 else {
1813 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1814 mask = zero;
1815 }
1816
1817 lp_build_mask_update(bld->mask, mask);
1818
1819 if (!near_end_of_shader(bld, pc))
1820 lp_build_mask_check(bld->mask);
1821 }
1822
1823
1824 /**
1825 * Emit code which will dump the value of all the temporary registers
1826 * to stdout.
1827 */
1828 static void
1829 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1830 {
1831 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1832 LLVMBuilderRef builder = gallivm->builder;
1833 LLVMValueRef temp_ptr;
1834 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1835 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1836 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1837 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1838 int index;
1839 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1840
1841 for (index = 0; index < n; index++) {
1842 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1843 LLVMValueRef v[4][4], res;
1844 int chan;
1845
1846 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1847
1848 for (chan = 0; chan < 4; chan++) {
1849 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1850 res = LLVMBuildLoad(builder, temp_ptr, "");
1851 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1852 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1853 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1854 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1855 }
1856
1857 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1858 v[0][0], v[0][1], v[0][2], v[0][3]);
1859 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1860 v[1][0], v[1][1], v[1][2], v[1][3]);
1861 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1862 v[2][0], v[2][1], v[2][2], v[2][3]);
1863 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1864 v[3][0], v[3][1], v[3][2], v[3][3]);
1865 }
1866 }
1867
1868
1869
1870 void
1871 lp_emit_declaration_soa(
1872 struct lp_build_tgsi_context *bld_base,
1873 const struct tgsi_full_declaration *decl)
1874 {
1875 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1876 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1877 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1878 const unsigned first = decl->Range.First;
1879 const unsigned last = decl->Range.Last;
1880 unsigned idx, i;
1881
1882 for (idx = first; idx <= last; ++idx) {
1883 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1884 switch (decl->Declaration.File) {
1885 case TGSI_FILE_TEMPORARY:
1886 assert(idx < LP_MAX_TGSI_TEMPS);
1887 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1888 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1889 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1890 }
1891 break;
1892
1893 case TGSI_FILE_OUTPUT:
1894 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1895 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1896 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1897 vec_type, "output");
1898 }
1899 break;
1900
1901 case TGSI_FILE_ADDRESS:
1902 /* ADDR registers are only allocated with an integer LLVM IR type,
1903 * as they are guaranteed to always have integers.
1904 * XXX: Not sure if this exception is worthwhile (or the whole idea of
1905 * an ADDR register for that matter).
1906 */
1907 assert(idx < LP_MAX_TGSI_ADDRS);
1908 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1909 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1910 break;
1911
1912 case TGSI_FILE_PREDICATE:
1913 assert(idx < LP_MAX_TGSI_PREDS);
1914 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1915 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1916 "predicate");
1917 break;
1918
1919 case TGSI_FILE_SAMPLER_VIEW:
1920 /*
1921 * The target stored here MUST match whatever there actually
1922 * is in the set sampler views (what about return type?).
1923 */
1924 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
1925 bld->sv[idx] = decl->SamplerView;
1926 break;
1927
1928 default:
1929 /* don't need to declare other vars */
1930 break;
1931 }
1932 }
1933 }
1934
1935
1936 void lp_emit_immediate_soa(
1937 struct lp_build_tgsi_context *bld_base,
1938 const struct tgsi_full_immediate *imm)
1939 {
1940 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1941 struct gallivm_state * gallivm = bld_base->base.gallivm;
1942
1943 /* simply copy the immediate values into the next immediates[] slot */
1944 unsigned i;
1945 const uint size = imm->Immediate.NrTokens - 1;
1946 assert(size <= 4);
1947 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1948 switch (imm->Immediate.DataType) {
1949 case TGSI_IMM_FLOAT32:
1950 for( i = 0; i < size; ++i )
1951 bld->immediates[bld->num_immediates][i] =
1952 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1953
1954 break;
1955 case TGSI_IMM_UINT32:
1956 for( i = 0; i < size; ++i ) {
1957 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1958 bld->immediates[bld->num_immediates][i] =
1959 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1960 }
1961
1962 break;
1963 case TGSI_IMM_INT32:
1964 for( i = 0; i < size; ++i ) {
1965 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1966 bld->immediates[bld->num_immediates][i] =
1967 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1968 }
1969
1970 break;
1971 }
1972 for( i = size; i < 4; ++i )
1973 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1974
1975 bld->num_immediates++;
1976 }
1977
1978 static void
1979 ddx_emit(
1980 const struct lp_build_tgsi_action * action,
1981 struct lp_build_tgsi_context * bld_base,
1982 struct lp_build_emit_data * emit_data)
1983 {
1984 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1985
1986 emit_fetch_deriv(bld, emit_data->args[0], NULL,
1987 &emit_data->output[emit_data->chan], NULL);
1988 }
1989
1990 static void
1991 ddy_emit(
1992 const struct lp_build_tgsi_action * action,
1993 struct lp_build_tgsi_context * bld_base,
1994 struct lp_build_emit_data * emit_data)
1995 {
1996 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1997
1998 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1999 &emit_data->output[emit_data->chan]);
2000 }
2001
2002 static void
2003 kilp_emit(
2004 const struct lp_build_tgsi_action * action,
2005 struct lp_build_tgsi_context * bld_base,
2006 struct lp_build_emit_data * emit_data)
2007 {
2008 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2009
2010 emit_kilp(bld, bld_base->pc - 1);
2011 }
2012
2013 static void
2014 kil_emit(
2015 const struct lp_build_tgsi_action * action,
2016 struct lp_build_tgsi_context * bld_base,
2017 struct lp_build_emit_data * emit_data)
2018 {
2019 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2020
2021 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
2022 }
2023
2024 static void
2025 tex_emit(
2026 const struct lp_build_tgsi_action * action,
2027 struct lp_build_tgsi_context * bld_base,
2028 struct lp_build_emit_data * emit_data)
2029 {
2030 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2031
2032 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2033 }
2034
2035 static void
2036 txb_emit(
2037 const struct lp_build_tgsi_action * action,
2038 struct lp_build_tgsi_context * bld_base,
2039 struct lp_build_emit_data * emit_data)
2040 {
2041 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2042
2043 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2044 emit_data->output);
2045 }
2046
2047 static void
2048 txd_emit(
2049 const struct lp_build_tgsi_action * action,
2050 struct lp_build_tgsi_context * bld_base,
2051 struct lp_build_emit_data * emit_data)
2052 {
2053 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2054
2055 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2056 emit_data->output);
2057 }
2058
2059 static void
2060 txl_emit(
2061 const struct lp_build_tgsi_action * action,
2062 struct lp_build_tgsi_context * bld_base,
2063 struct lp_build_emit_data * emit_data)
2064 {
2065 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2066
2067 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2068 emit_data->output);
2069 }
2070
2071 static void
2072 txp_emit(
2073 const struct lp_build_tgsi_action * action,
2074 struct lp_build_tgsi_context * bld_base,
2075 struct lp_build_emit_data * emit_data)
2076 {
2077 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2078
2079 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2080 emit_data->output);
2081 }
2082
2083 static void
2084 txq_emit(
2085 const struct lp_build_tgsi_action * action,
2086 struct lp_build_tgsi_context * bld_base,
2087 struct lp_build_emit_data * emit_data)
2088 {
2089 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2090
2091 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2092 }
2093
2094 static void
2095 txf_emit(
2096 const struct lp_build_tgsi_action * action,
2097 struct lp_build_tgsi_context * bld_base,
2098 struct lp_build_emit_data * emit_data)
2099 {
2100 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2101
2102 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2103 }
2104
2105 static void
2106 sample_i_emit(
2107 const struct lp_build_tgsi_action * action,
2108 struct lp_build_tgsi_context * bld_base,
2109 struct lp_build_emit_data * emit_data)
2110 {
2111 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2112
2113 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2114 }
2115
2116 static void
2117 sample_emit(
2118 const struct lp_build_tgsi_action * action,
2119 struct lp_build_tgsi_context * bld_base,
2120 struct lp_build_emit_data * emit_data)
2121 {
2122 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2123
2124 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2125 FALSE, emit_data->output);
2126 }
2127
2128 static void
2129 sample_b_emit(
2130 const struct lp_build_tgsi_action * action,
2131 struct lp_build_tgsi_context * bld_base,
2132 struct lp_build_emit_data * emit_data)
2133 {
2134 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2135
2136 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2137 FALSE, emit_data->output);
2138 }
2139
2140 static void
2141 sample_c_emit(
2142 const struct lp_build_tgsi_action * action,
2143 struct lp_build_tgsi_context * bld_base,
2144 struct lp_build_emit_data * emit_data)
2145 {
2146 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2147
2148 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2149 TRUE, emit_data->output);
2150 }
2151
2152 static void
2153 sample_c_lz_emit(
2154 const struct lp_build_tgsi_action * action,
2155 struct lp_build_tgsi_context * bld_base,
2156 struct lp_build_emit_data * emit_data)
2157 {
2158 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2159
2160 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2161 TRUE, emit_data->output);
2162 }
2163
2164 static void
2165 sample_d_emit(
2166 const struct lp_build_tgsi_action * action,
2167 struct lp_build_tgsi_context * bld_base,
2168 struct lp_build_emit_data * emit_data)
2169 {
2170 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2171
2172 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2173 FALSE, emit_data->output);
2174 }
2175
2176 static void
2177 sample_l_emit(
2178 const struct lp_build_tgsi_action * action,
2179 struct lp_build_tgsi_context * bld_base,
2180 struct lp_build_emit_data * emit_data)
2181 {
2182 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2183
2184 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2185 FALSE, emit_data->output);
2186 }
2187
2188 static void
2189 sviewinfo_emit(
2190 const struct lp_build_tgsi_action * action,
2191 struct lp_build_tgsi_context * bld_base,
2192 struct lp_build_emit_data * emit_data)
2193 {
2194 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2195
2196 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2197 }
2198
2199 static LLVMValueRef
2200 mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
2201 {
2202 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2203 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2204 LLVMValueRef one_vec = bld_base->int_bld.one;
2205 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2206
2207 if (exec_mask->has_mask) {
2208 one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
2209 }
2210 one_vec = LLVMBuildAnd(builder, one_vec,
2211 lp_build_mask_value(bld->mask), "");
2212 return one_vec;
2213 }
2214
2215 static void
2216 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2217 LLVMValueRef ptr,
2218 LLVMValueRef mask)
2219 {
2220 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2221
2222 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2223
2224 current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
2225
2226 LLVMBuildStore(builder, current_vec, ptr);
2227 }
2228
2229 static void
2230 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2231 LLVMValueRef ptr,
2232 LLVMValueRef mask)
2233 {
2234 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2235
2236 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2237 LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
2238 PIPE_FUNC_NOTEQUAL,
2239 mask,
2240 bld_base->uint_bld.zero);
2241
2242 current_vec = lp_build_select(&bld_base->uint_bld,
2243 full_mask,
2244 bld_base->uint_bld.zero,
2245 current_vec);
2246
2247 LLVMBuildStore(builder, current_vec, ptr);
2248 }
2249
2250 static void
2251 emit_vertex(
2252 const struct lp_build_tgsi_action * action,
2253 struct lp_build_tgsi_context * bld_base,
2254 struct lp_build_emit_data * emit_data)
2255 {
2256 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2257 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2258
2259 if (bld->gs_iface->emit_vertex) {
2260 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2261 LLVMValueRef total_emitted_vertices_vec =
2262 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2263 gather_outputs(bld);
2264 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2265 bld->outputs,
2266 total_emitted_vertices_vec);
2267 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2268 masked_ones);
2269 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2270 masked_ones);
2271 #if DUMP_GS_EMITS
2272 lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex masked ones = ",
2273 masked_ones);
2274 lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex emitted = ",
2275 total_emitted_vertices_vec);
2276 #endif
2277 bld->pending_end_primitive = TRUE;
2278 }
2279 }
2280
2281
2282 static void
2283 end_primitive(
2284 const struct lp_build_tgsi_action * action,
2285 struct lp_build_tgsi_context * bld_base,
2286 struct lp_build_emit_data * emit_data)
2287 {
2288 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2289 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2290
2291 if (bld->gs_iface->end_primitive) {
2292 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2293 LLVMValueRef emitted_vertices_vec =
2294 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2295 LLVMValueRef emitted_prims_vec =
2296 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2297
2298 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2299 emitted_vertices_vec,
2300 emitted_prims_vec);
2301
2302 #if DUMP_GS_EMITS
2303 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim masked ones = ",
2304 masked_ones);
2305 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted verts1 = ",
2306 emitted_vertices_vec);
2307 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted prims1 = ",
2308 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""));
2309 #endif
2310 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2311 masked_ones);
2312 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2313 masked_ones);
2314 #if DUMP_GS_EMITS
2315 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted verts2 = ",
2316 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""));
2317 #endif
2318
2319 bld->pending_end_primitive = FALSE;
2320 }
2321 }
2322
2323 static void
2324 cal_emit(
2325 const struct lp_build_tgsi_action * action,
2326 struct lp_build_tgsi_context * bld_base,
2327 struct lp_build_emit_data * emit_data)
2328 {
2329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2330
2331 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2332 &bld_base->pc);
2333 }
2334
2335 static void
2336 ret_emit(
2337 const struct lp_build_tgsi_action * action,
2338 struct lp_build_tgsi_context * bld_base,
2339 struct lp_build_emit_data * emit_data)
2340 {
2341 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2342
2343 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2344 }
2345
2346 static void
2347 brk_emit(
2348 const struct lp_build_tgsi_action * action,
2349 struct lp_build_tgsi_context * bld_base,
2350 struct lp_build_emit_data * emit_data)
2351 {
2352 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2353
2354 lp_exec_break(&bld->exec_mask);
2355 }
2356
2357 static void
2358 breakc_emit(
2359 const struct lp_build_tgsi_action * action,
2360 struct lp_build_tgsi_context * bld_base,
2361 struct lp_build_emit_data * emit_data)
2362 {
2363 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2364 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2365 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2366 LLVMValueRef unsigned_cond =
2367 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
2368 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2369 unsigned_cond,
2370 uint_bld->zero);
2371
2372 lp_exec_break_condition(&bld->exec_mask, cond);
2373 }
2374
2375 static void
2376 if_emit(
2377 const struct lp_build_tgsi_action * action,
2378 struct lp_build_tgsi_context * bld_base,
2379 struct lp_build_emit_data * emit_data)
2380 {
2381 LLVMValueRef tmp;
2382 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2383
2384 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2385 emit_data->args[0], bld->bld_base.base.zero);
2386 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2387 }
2388
2389 static void
2390 bgnloop_emit(
2391 const struct lp_build_tgsi_action * action,
2392 struct lp_build_tgsi_context * bld_base,
2393 struct lp_build_emit_data * emit_data)
2394 {
2395 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2396
2397 lp_exec_bgnloop(&bld->exec_mask);
2398 }
2399
2400 static void
2401 bgnsub_emit(
2402 const struct lp_build_tgsi_action * action,
2403 struct lp_build_tgsi_context * bld_base,
2404 struct lp_build_emit_data * emit_data)
2405 {
2406 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2407
2408 lp_exec_mask_bgnsub(&bld->exec_mask);
2409 }
2410
2411 static void
2412 else_emit(
2413 const struct lp_build_tgsi_action * action,
2414 struct lp_build_tgsi_context * bld_base,
2415 struct lp_build_emit_data * emit_data)
2416 {
2417 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2418
2419 lp_exec_mask_cond_invert(&bld->exec_mask);
2420 }
2421
2422 static void
2423 endif_emit(
2424 const struct lp_build_tgsi_action * action,
2425 struct lp_build_tgsi_context * bld_base,
2426 struct lp_build_emit_data * emit_data)
2427 {
2428 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2429
2430 lp_exec_mask_cond_pop(&bld->exec_mask);
2431 }
2432
2433 static void
2434 endloop_emit(
2435 const struct lp_build_tgsi_action * action,
2436 struct lp_build_tgsi_context * bld_base,
2437 struct lp_build_emit_data * emit_data)
2438 {
2439 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2440
2441 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
2442 }
2443
2444 static void
2445 endsub_emit(
2446 const struct lp_build_tgsi_action * action,
2447 struct lp_build_tgsi_context * bld_base,
2448 struct lp_build_emit_data * emit_data)
2449 {
2450 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2451
2452 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
2453 }
2454
2455 static void
2456 cont_emit(
2457 const struct lp_build_tgsi_action * action,
2458 struct lp_build_tgsi_context * bld_base,
2459 struct lp_build_emit_data * emit_data)
2460 {
2461 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2462
2463 lp_exec_continue(&bld->exec_mask);
2464 }
2465
2466 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2467 *
2468 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2469 * from old code, but there is no garauntee that LLVM will use those registers
2470 * for this code.
2471 *
2472 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2473 * should be handled by the emit_data->fetch_args function. */
2474 static void
2475 nrm_emit(
2476 const struct lp_build_tgsi_action * action,
2477 struct lp_build_tgsi_context * bld_base,
2478 struct lp_build_emit_data * emit_data)
2479 {
2480 LLVMValueRef tmp0, tmp1;
2481 LLVMValueRef tmp4 = NULL;
2482 LLVMValueRef tmp5 = NULL;
2483 LLVMValueRef tmp6 = NULL;
2484 LLVMValueRef tmp7 = NULL;
2485 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2486
2487 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2488
2489 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2490 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2491 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2492 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2493
2494 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2495
2496 /* xmm4 = src.x */
2497 /* xmm0 = src.x * src.x */
2498 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2499 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2500 tmp4 = tmp0;
2501 }
2502 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
2503
2504 /* xmm5 = src.y */
2505 /* xmm0 = xmm0 + src.y * src.y */
2506 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
2507 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2508 tmp5 = tmp1;
2509 }
2510 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2511 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2512
2513 /* xmm6 = src.z */
2514 /* xmm0 = xmm0 + src.z * src.z */
2515 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
2516 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2517 tmp6 = tmp1;
2518 }
2519 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2520 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2521
2522 if (dims == 4) {
2523 /* xmm7 = src.w */
2524 /* xmm0 = xmm0 + src.w * src.w */
2525 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2526 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
2527 tmp7 = tmp1;
2528 }
2529 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2530 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2531 }
2532 /* xmm1 = 1 / sqrt(xmm0) */
2533 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
2534 /* dst.x = xmm1 * src.x */
2535 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2536 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
2537 }
2538 /* dst.y = xmm1 * src.y */
2539 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2540 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
2541 }
2542
2543 /* dst.z = xmm1 * src.z */
2544 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2545 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
2546 }
2547 /* dst.w = xmm1 * src.w */
2548 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
2549 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
2550 }
2551 }
2552
2553 /* dst.w = 1.0 */
2554 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
2555 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
2556 }
2557 }
2558
2559 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
2560 {
2561 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2562 struct gallivm_state * gallivm = bld_base->base.gallivm;
2563
2564 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2565 LLVMValueRef array_size =
2566 lp_build_const_int32(gallivm,
2567 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2568 bld->temps_array = lp_build_array_alloca(gallivm,
2569 bld_base->base.vec_type, array_size,
2570 "temp_array");
2571 }
2572
2573 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2574 LLVMValueRef array_size =
2575 lp_build_const_int32(gallivm,
2576 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2577 bld->outputs_array = lp_build_array_alloca(gallivm,
2578 bld_base->base.vec_type, array_size,
2579 "output_array");
2580 }
2581
2582 /* If we have indirect addressing in inputs we need to copy them into
2583 * our alloca array to be able to iterate over them */
2584 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
2585 unsigned index, chan;
2586 LLVMTypeRef vec_type = bld_base->base.vec_type;
2587 LLVMValueRef array_size = lp_build_const_int32(gallivm,
2588 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2589 bld->inputs_array = lp_build_array_alloca(gallivm,
2590 vec_type, array_size,
2591 "input_array");
2592
2593 assert(bld_base->info->num_inputs
2594 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2595
2596 for (index = 0; index < bld_base->info->num_inputs; ++index) {
2597 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2598 LLVMValueRef lindex =
2599 lp_build_const_int32(gallivm, index * 4 + chan);
2600 LLVMValueRef input_ptr =
2601 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2602 &lindex, 1, "");
2603 LLVMValueRef value = bld->inputs[index][chan];
2604 if (value)
2605 LLVMBuildStore(gallivm->builder, value, input_ptr);
2606 }
2607 }
2608 }
2609
2610 if (bld->gs_iface) {
2611 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2612 bld->emitted_prims_vec_ptr =
2613 lp_build_alloca(gallivm,
2614 uint_bld->vec_type,
2615 "emitted_prims_ptr");
2616 bld->emitted_vertices_vec_ptr =
2617 lp_build_alloca(gallivm,
2618 uint_bld->vec_type,
2619 "emitted_vertices_ptr");
2620 bld->total_emitted_vertices_vec_ptr =
2621 lp_build_alloca(gallivm,
2622 uint_bld->vec_type,
2623 "total_emitted_vertices_ptr");
2624
2625 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2626 bld->emitted_prims_vec_ptr);
2627 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2628 bld->emitted_vertices_vec_ptr);
2629 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2630 bld->total_emitted_vertices_vec_ptr);
2631 }
2632 }
2633
2634 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
2635 {
2636 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2637 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2638
2639 if (0) {
2640 /* for debugging */
2641 emit_dump_temps(bld);
2642 }
2643
2644 /* If we have indirect addressing in outputs we need to copy our alloca array
2645 * to the outputs slots specified by the caller */
2646 if (bld->gs_iface) {
2647 LLVMValueRef total_emitted_vertices_vec;
2648 LLVMValueRef emitted_prims_vec;
2649 /* flush the accumulated vertices as a primitive */
2650 if (bld->pending_end_primitive) {
2651 end_primitive(NULL, bld_base, NULL);
2652 bld->pending_end_primitive = FALSE;
2653 }
2654 total_emitted_vertices_vec =
2655 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2656 emitted_prims_vec =
2657 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2658
2659 bld->gs_iface->gs_epilogue(bld->gs_iface,
2660 &bld->bld_base,
2661 total_emitted_vertices_vec,
2662 emitted_prims_vec);
2663 } else {
2664 gather_outputs(bld);
2665 }
2666 }
2667
2668 void
2669 lp_build_tgsi_soa(struct gallivm_state *gallivm,
2670 const struct tgsi_token *tokens,
2671 struct lp_type type,
2672 struct lp_build_mask_context *mask,
2673 LLVMValueRef consts_ptr,
2674 const struct lp_bld_tgsi_system_values *system_values,
2675 const LLVMValueRef *pos,
2676 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
2677 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
2678 struct lp_build_sampler_soa *sampler,
2679 const struct tgsi_shader_info *info,
2680 const struct lp_build_tgsi_gs_iface *gs_iface)
2681 {
2682 struct lp_build_tgsi_soa_context bld;
2683
2684 struct lp_type res_type;
2685
2686 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2687 memset(&res_type, 0, sizeof res_type);
2688 res_type.width = type.width;
2689 res_type.length = type.length;
2690 res_type.sign = 1;
2691
2692 /* Setup build context */
2693 memset(&bld, 0, sizeof bld);
2694 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2695 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2696 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2697 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2698 bld.mask = mask;
2699 bld.pos = pos;
2700 bld.inputs = inputs;
2701 bld.outputs = outputs;
2702 bld.consts_ptr = consts_ptr;
2703 bld.sampler = sampler;
2704 bld.bld_base.info = info;
2705 bld.indirect_files = info->indirect_files;
2706
2707 bld.bld_base.soa = TRUE;
2708 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2709 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2710 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2711 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2712 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2713 bld.bld_base.emit_store = emit_store;
2714
2715 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2716 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2717
2718 bld.bld_base.emit_prologue = emit_prologue;
2719 bld.bld_base.emit_epilogue = emit_epilogue;
2720
2721 /* Set opcode actions */
2722 lp_set_default_actions_cpu(&bld.bld_base);
2723
2724 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2725 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2726 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2727 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
2728 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2729 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2730 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2731 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2732 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2733 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2734 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2735 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2736 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2737 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2738 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2739 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2740 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2741 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2742 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2743 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2744 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2745 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2746 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2747 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2748 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
2749 /* DX10 sampling ops */
2750 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
2751 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
2752 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
2753 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
2754 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
2755 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
2756 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
2757 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
2758
2759 if (gs_iface) {
2760 /* inputs are always indirect with gs */
2761 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
2762 bld.gs_iface = gs_iface;
2763 bld.pending_end_primitive = FALSE;
2764 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
2765 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
2766 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
2767 }
2768
2769 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2770
2771 bld.system_values = *system_values;
2772
2773 lp_build_tgsi_llvm(&bld.bld_base, tokens);
2774
2775 if (0) {
2776 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2777 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2778 debug_printf("11111111111111111111111111111 \n");
2779 tgsi_dump(tokens, 0);
2780 lp_debug_dump_value(function);
2781 debug_printf("2222222222222222222222222222 \n");
2782 }
2783
2784 if (0) {
2785 LLVMModuleRef module = LLVMGetGlobalParent(
2786 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2787 LLVMDumpModule(module);
2788
2789 }
2790 }