239530de72940444667190626843d0702cec3a7a
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68
69 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
70 {
71 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
72 LLVMBuilderRef builder = bld->gallivm->builder;
73
74 mask->bld = bld;
75 mask->has_mask = FALSE;
76 mask->ret_in_main = FALSE;
77 mask->cond_stack_size = 0;
78 mask->loop_stack_size = 0;
79 mask->call_stack_size = 0;
80
81 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
82 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
83 LLVMConstAllOnes(mask->int_vec_type);
84
85 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
86
87 LLVMBuildStore(
88 builder,
89 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
90 mask->loop_limiter);
91 }
92
93 static void lp_exec_mask_update(struct lp_exec_mask *mask)
94 {
95 LLVMBuilderRef builder = mask->bld->gallivm->builder;
96
97 if (mask->loop_stack_size) {
98 /*for loops we need to update the entire mask at runtime */
99 LLVMValueRef tmp;
100 assert(mask->break_mask);
101 tmp = LLVMBuildAnd(builder,
102 mask->cont_mask,
103 mask->break_mask,
104 "maskcb");
105 mask->exec_mask = LLVMBuildAnd(builder,
106 mask->cond_mask,
107 tmp,
108 "maskfull");
109 } else
110 mask->exec_mask = mask->cond_mask;
111
112 if (mask->call_stack_size || mask->ret_in_main) {
113 mask->exec_mask = LLVMBuildAnd(builder,
114 mask->exec_mask,
115 mask->ret_mask,
116 "callmask");
117 }
118
119 mask->has_mask = (mask->cond_stack_size > 0 ||
120 mask->loop_stack_size > 0 ||
121 mask->call_stack_size > 0 ||
122 mask->ret_in_main);
123 }
124
125 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
126 LLVMValueRef val)
127 {
128 LLVMBuilderRef builder = mask->bld->gallivm->builder;
129
130 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
131 if (mask->cond_stack_size == 0) {
132 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
133 }
134 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
135 assert(LLVMTypeOf(val) == mask->int_vec_type);
136 mask->cond_mask = LLVMBuildAnd(builder,
137 mask->cond_mask,
138 val,
139 "");
140 lp_exec_mask_update(mask);
141 }
142
143 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
144 {
145 LLVMBuilderRef builder = mask->bld->gallivm->builder;
146 LLVMValueRef prev_mask;
147 LLVMValueRef inv_mask;
148
149 assert(mask->cond_stack_size);
150 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
151 if (mask->cond_stack_size == 1) {
152 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
153 }
154
155 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
156
157 mask->cond_mask = LLVMBuildAnd(builder,
158 inv_mask,
159 prev_mask, "");
160 lp_exec_mask_update(mask);
161 }
162
163 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
164 {
165 assert(mask->cond_stack_size);
166 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
167 lp_exec_mask_update(mask);
168 }
169
170 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
171 {
172 LLVMBuilderRef builder = mask->bld->gallivm->builder;
173
174 if (mask->loop_stack_size == 0) {
175 assert(mask->loop_block == NULL);
176 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
177 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
178 assert(mask->break_var == NULL);
179 }
180
181 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
182
183 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
184 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
185 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
186 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
187 ++mask->loop_stack_size;
188
189 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
190 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
191
192 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
193
194 LLVMBuildBr(builder, mask->loop_block);
195 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
196
197 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
198
199 lp_exec_mask_update(mask);
200 }
201
202 static void lp_exec_break(struct lp_exec_mask *mask)
203 {
204 LLVMBuilderRef builder = mask->bld->gallivm->builder;
205 LLVMValueRef exec_mask = LLVMBuildNot(builder,
206 mask->exec_mask,
207 "break");
208
209 mask->break_mask = LLVMBuildAnd(builder,
210 mask->break_mask,
211 exec_mask, "break_full");
212
213 lp_exec_mask_update(mask);
214 }
215
216 static void lp_exec_break_condition(struct lp_exec_mask *mask,
217 LLVMValueRef cond)
218 {
219 LLVMBuilderRef builder = mask->bld->gallivm->builder;
220 LLVMValueRef cond_mask = LLVMBuildAnd(builder,
221 mask->exec_mask,
222 cond, "cond_mask");
223 cond_mask = LLVMBuildNot(builder, cond, "break_cond");
224
225 mask->break_mask = LLVMBuildAnd(builder,
226 mask->break_mask,
227 cond_mask, "breakc_full");
228
229 lp_exec_mask_update(mask);
230 }
231
232 static void lp_exec_continue(struct lp_exec_mask *mask)
233 {
234 LLVMBuilderRef builder = mask->bld->gallivm->builder;
235 LLVMValueRef exec_mask = LLVMBuildNot(builder,
236 mask->exec_mask,
237 "");
238
239 mask->cont_mask = LLVMBuildAnd(builder,
240 mask->cont_mask,
241 exec_mask, "");
242
243 lp_exec_mask_update(mask);
244 }
245
246
247 static void lp_exec_endloop(struct gallivm_state *gallivm,
248 struct lp_exec_mask *mask)
249 {
250 LLVMBuilderRef builder = mask->bld->gallivm->builder;
251 LLVMBasicBlockRef endloop;
252 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
253 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
254 mask->bld->type.width *
255 mask->bld->type.length);
256 LLVMValueRef i1cond, i2cond, icond, limiter;
257
258 assert(mask->break_mask);
259
260 /*
261 * Restore the cont_mask, but don't pop
262 */
263 assert(mask->loop_stack_size);
264 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
265 lp_exec_mask_update(mask);
266
267 /*
268 * Unlike the continue mask, the break_mask must be preserved across loop
269 * iterations
270 */
271 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
272
273 /* Decrement the loop limiter */
274 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
275
276 limiter = LLVMBuildSub(
277 builder,
278 limiter,
279 LLVMConstInt(int_type, 1, false),
280 "");
281
282 LLVMBuildStore(builder, limiter, mask->loop_limiter);
283
284 /* i1cond = (mask != 0) */
285 i1cond = LLVMBuildICmp(
286 builder,
287 LLVMIntNE,
288 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
289 LLVMConstNull(reg_type), "i1cond");
290
291 /* i2cond = (looplimiter > 0) */
292 i2cond = LLVMBuildICmp(
293 builder,
294 LLVMIntSGT,
295 limiter,
296 LLVMConstNull(int_type), "i2cond");
297
298 /* if( i1cond && i2cond ) */
299 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
300
301 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
302
303 LLVMBuildCondBr(builder,
304 icond, mask->loop_block, endloop);
305
306 LLVMPositionBuilderAtEnd(builder, endloop);
307
308 assert(mask->loop_stack_size);
309 --mask->loop_stack_size;
310 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
311 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
312 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
313 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
314
315 lp_exec_mask_update(mask);
316 }
317
318 /* stores val into an address pointed to by dst.
319 * mask->exec_mask is used to figure out which bits of val
320 * should be stored into the address
321 * (0 means don't store this bit, 1 means do store).
322 */
323 static void lp_exec_mask_store(struct lp_exec_mask *mask,
324 struct lp_build_context *bld_store,
325 LLVMValueRef pred,
326 LLVMValueRef val,
327 LLVMValueRef dst)
328 {
329 LLVMBuilderRef builder = mask->bld->gallivm->builder;
330
331 /* Mix the predicate and execution mask */
332 if (mask->has_mask) {
333 if (pred) {
334 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
335 } else {
336 pred = mask->exec_mask;
337 }
338 }
339
340 if (pred) {
341 LLVMValueRef real_val, dst_val;
342
343 dst_val = LLVMBuildLoad(builder, dst, "");
344 real_val = lp_build_select(bld_store,
345 pred,
346 val, dst_val);
347
348 LLVMBuildStore(builder, real_val, dst);
349 } else
350 LLVMBuildStore(builder, val, dst);
351 }
352
353 static void lp_exec_mask_call(struct lp_exec_mask *mask,
354 int func,
355 int *pc)
356 {
357 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
358 mask->call_stack[mask->call_stack_size].pc = *pc;
359 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
360 mask->call_stack_size++;
361 *pc = func;
362 }
363
364 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
365 {
366 LLVMBuilderRef builder = mask->bld->gallivm->builder;
367 LLVMValueRef exec_mask;
368
369 if (mask->cond_stack_size == 0 &&
370 mask->loop_stack_size == 0 &&
371 mask->call_stack_size == 0) {
372 /* returning from main() */
373 *pc = -1;
374 return;
375 }
376
377 if (mask->call_stack_size == 0) {
378 /*
379 * This requires special handling since we need to ensure
380 * we don't drop the mask even if we have no call stack
381 * (e.g. after a ret in a if clause after the endif)
382 */
383 mask->ret_in_main = TRUE;
384 }
385
386 exec_mask = LLVMBuildNot(builder,
387 mask->exec_mask,
388 "ret");
389
390 mask->ret_mask = LLVMBuildAnd(builder,
391 mask->ret_mask,
392 exec_mask, "ret_full");
393
394 lp_exec_mask_update(mask);
395 }
396
397 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
398 {
399 }
400
401 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
402 {
403 assert(mask->call_stack_size);
404 mask->call_stack_size--;
405 *pc = mask->call_stack[mask->call_stack_size].pc;
406 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
407 lp_exec_mask_update(mask);
408 }
409
410
411 /**
412 * Return pointer to a temporary register channel (src or dest).
413 * Note that indirect addressing cannot be handled here.
414 * \param index which temporary register
415 * \param chan which channel of the temp register.
416 */
417 LLVMValueRef
418 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
419 unsigned index,
420 unsigned chan)
421 {
422 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
423 assert(chan < 4);
424 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
425 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
426 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
427 }
428 else {
429 return bld->temps[index][chan];
430 }
431 }
432
433 /**
434 * Return pointer to a output register channel (src or dest).
435 * Note that indirect addressing cannot be handled here.
436 * \param index which output register
437 * \param chan which channel of the output register.
438 */
439 LLVMValueRef
440 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
441 unsigned index,
442 unsigned chan)
443 {
444 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
445 assert(chan < 4);
446 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
447 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
448 index * 4 + chan);
449 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
450 }
451 else {
452 return bld->outputs[index][chan];
453 }
454 }
455
456 /*
457 * If we have indirect addressing in outputs copy our alloca array
458 * to the outputs slots specified by the caller to make sure
459 * our outputs are delivered consistently via the same interface.
460 */
461 static void
462 gather_outputs(struct lp_build_tgsi_soa_context * bld)
463 {
464 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
465 unsigned index, chan;
466 assert(bld->bld_base.info->num_outputs <=
467 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
468 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
469 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
470 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
471 }
472 }
473 }
474 }
475
476 /**
477 * Gather vector.
478 * XXX the lp_build_gather() function should be capable of doing this
479 * with a little work.
480 */
481 static LLVMValueRef
482 build_gather(struct lp_build_context *bld,
483 LLVMValueRef base_ptr,
484 LLVMValueRef indexes)
485 {
486 LLVMBuilderRef builder = bld->gallivm->builder;
487 LLVMValueRef res = bld->undef;
488 unsigned i;
489
490 /*
491 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
492 */
493 for (i = 0; i < bld->type.length; i++) {
494 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
495 LLVMValueRef index = LLVMBuildExtractElement(builder,
496 indexes, ii, "");
497 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
498 &index, 1, "gather_ptr");
499 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
500
501 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
502 }
503
504 return res;
505 }
506
507
508 /**
509 * Scatter/store vector.
510 */
511 static void
512 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
513 LLVMValueRef base_ptr,
514 LLVMValueRef indexes,
515 LLVMValueRef values,
516 struct lp_exec_mask *mask,
517 LLVMValueRef pred)
518 {
519 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
520 LLVMBuilderRef builder = gallivm->builder;
521 unsigned i;
522
523 /* Mix the predicate and execution mask */
524 if (mask->has_mask) {
525 if (pred) {
526 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
527 }
528 else {
529 pred = mask->exec_mask;
530 }
531 }
532
533 /*
534 * Loop over elements of index_vec, store scalar value.
535 */
536 for (i = 0; i < bld->bld_base.base.type.length; i++) {
537 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
538 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
539 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
540 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
541 LLVMValueRef scalar_pred = pred ?
542 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
543
544 if (0)
545 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
546 ii, val, index, scalar_ptr);
547
548 if (scalar_pred) {
549 LLVMValueRef real_val, dst_val;
550 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
551 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
552 LLVMBuildStore(builder, real_val, scalar_ptr);
553 }
554 else {
555 LLVMBuildStore(builder, val, scalar_ptr);
556 }
557 }
558 }
559
560
561 /**
562 * Read the current value of the ADDR register, convert the floats to
563 * ints, add the base index and return the vector of offsets.
564 * The offsets will be used to index into the constant buffer or
565 * temporary register file.
566 */
567 static LLVMValueRef
568 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
569 unsigned reg_file, unsigned reg_index,
570 const struct tgsi_ind_register *indirect_reg)
571 {
572 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
573 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
574 /* always use X component of address register */
575 unsigned swizzle = indirect_reg->Swizzle;
576 LLVMValueRef base;
577 LLVMValueRef rel;
578 LLVMValueRef max_index;
579 LLVMValueRef index;
580
581 assert(bld->indirect_files & (1 << reg_file));
582
583 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
584
585 assert(swizzle < 4);
586 switch (indirect_reg->File) {
587 case TGSI_FILE_ADDRESS:
588 rel = LLVMBuildLoad(builder,
589 bld->addr[indirect_reg->Index][swizzle],
590 "load addr reg");
591 /* ADDR LLVM values already have LLVM integer type. */
592 break;
593 case TGSI_FILE_TEMPORARY:
594 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
595 rel = LLVMBuildLoad(builder, rel, "load temp reg");
596 /* TEMP LLVM values always have LLVM float type, but for indirection, the
597 * value actually stored is expected to be an integer */
598 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
599 break;
600 default:
601 assert(0);
602 rel = uint_bld->zero;
603 }
604
605 index = lp_build_add(uint_bld, base, rel);
606
607 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
608 uint_bld->type,
609 bld->bld_base.info->file_max[reg_file]);
610
611 assert(!uint_bld->type.sign);
612 index = lp_build_min(uint_bld, index, max_index);
613
614 return index;
615 }
616
617 static struct lp_build_context *
618 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
619 enum tgsi_opcode_type stype)
620 {
621 struct lp_build_context *bld_fetch;
622
623 switch (stype) {
624 case TGSI_TYPE_FLOAT:
625 case TGSI_TYPE_UNTYPED:
626 bld_fetch = &bld_base->base;
627 break;
628 case TGSI_TYPE_UNSIGNED:
629 bld_fetch = &bld_base->uint_bld;
630 break;
631 case TGSI_TYPE_SIGNED:
632 bld_fetch = &bld_base->int_bld;
633 break;
634 case TGSI_TYPE_VOID:
635 case TGSI_TYPE_DOUBLE:
636 default:
637 assert(0);
638 bld_fetch = NULL;
639 break;
640 }
641 return bld_fetch;
642 }
643
644 static LLVMValueRef
645 emit_fetch_constant(
646 struct lp_build_tgsi_context * bld_base,
647 const struct tgsi_full_src_register * reg,
648 enum tgsi_opcode_type stype,
649 unsigned swizzle)
650 {
651 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
652 struct gallivm_state *gallivm = bld_base->base.gallivm;
653 LLVMBuilderRef builder = gallivm->builder;
654 struct lp_build_context *uint_bld = &bld_base->uint_bld;
655 LLVMValueRef indirect_index = NULL;
656 unsigned dimension = 0;
657 LLVMValueRef dimension_index;
658 LLVMValueRef consts_ptr;
659 LLVMValueRef res;
660
661 /* XXX: Handle fetching xyzw components as a vector */
662 assert(swizzle != ~0);
663
664 if (reg->Register.Dimension) {
665 assert(!reg->Dimension.Indirect);
666 dimension = reg->Dimension.Index;
667 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
668 }
669
670 dimension_index = lp_build_const_int32(gallivm, dimension);
671 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
672
673 if (reg->Register.Indirect) {
674 indirect_index = get_indirect_index(bld,
675 reg->Register.File,
676 reg->Register.Index,
677 &reg->Indirect);
678 }
679
680 if (reg->Register.Indirect) {
681 LLVMValueRef swizzle_vec =
682 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
683 LLVMValueRef index_vec; /* index into the const buffer */
684
685 /* index_vec = indirect_index * 4 + swizzle */
686 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
687 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
688
689 /* Gather values from the constant buffer */
690 res = build_gather(&bld_base->base, consts_ptr, index_vec);
691 }
692 else {
693 LLVMValueRef index; /* index into the const buffer */
694 LLVMValueRef scalar, scalar_ptr;
695
696 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
697
698 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
699 &index, 1, "");
700 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
701 res = lp_build_broadcast_scalar(&bld_base->base, scalar);
702 }
703
704 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
705 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
706 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
707 }
708 return res;
709 }
710
711 static LLVMValueRef
712 emit_fetch_immediate(
713 struct lp_build_tgsi_context * bld_base,
714 const struct tgsi_full_src_register * reg,
715 enum tgsi_opcode_type stype,
716 unsigned swizzle)
717 {
718 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
719 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
720 assert(res);
721
722 if (stype == TGSI_TYPE_UNSIGNED) {
723 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
724 } else if (stype == TGSI_TYPE_SIGNED) {
725 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
726 }
727 return res;
728 }
729
730 static LLVMValueRef
731 emit_fetch_input(
732 struct lp_build_tgsi_context * bld_base,
733 const struct tgsi_full_src_register * reg,
734 enum tgsi_opcode_type stype,
735 unsigned swizzle)
736 {
737 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
738 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
739 LLVMBuilderRef builder = gallivm->builder;
740 struct lp_build_context *uint_bld = &bld_base->uint_bld;
741 LLVMValueRef indirect_index = NULL;
742 LLVMValueRef res;
743
744 if (reg->Register.Indirect) {
745 indirect_index = get_indirect_index(bld,
746 reg->Register.File,
747 reg->Register.Index,
748 &reg->Indirect);
749 }
750
751 if (reg->Register.Indirect) {
752 LLVMValueRef swizzle_vec =
753 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
754 LLVMValueRef length_vec =
755 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
756 LLVMValueRef index_vec; /* index into the const buffer */
757 LLVMValueRef inputs_array;
758 LLVMTypeRef float4_ptr_type;
759
760 /* index_vec = (indirect_index * 4 + swizzle) * length */
761 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
762 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
763 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
764
765 /* cast inputs_array pointer to float* */
766 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
767 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
768 float4_ptr_type, "");
769
770 /* Gather values from the temporary register array */
771 res = build_gather(&bld_base->base, inputs_array, index_vec);
772 } else {
773 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
774 LLVMValueRef lindex = lp_build_const_int32(gallivm,
775 reg->Register.Index * 4 + swizzle);
776 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
777 bld->inputs_array, &lindex, 1, "");
778 res = LLVMBuildLoad(builder, input_ptr, "");
779 }
780 else {
781 res = bld->inputs[reg->Register.Index][swizzle];
782 }
783 }
784
785 assert(res);
786
787 if (stype == TGSI_TYPE_UNSIGNED) {
788 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
789 } else if (stype == TGSI_TYPE_SIGNED) {
790 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
791 }
792
793 return res;
794 }
795
796
797 static LLVMValueRef
798 emit_fetch_gs_input(
799 struct lp_build_tgsi_context * bld_base,
800 const struct tgsi_full_src_register * reg,
801 enum tgsi_opcode_type stype,
802 unsigned swizzle)
803 {
804 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
805 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
806 LLVMBuilderRef builder = gallivm->builder;
807 LLVMValueRef attrib_index = NULL;
808 LLVMValueRef vertex_index = NULL;
809 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
810 LLVMValueRef res;
811
812 if (reg->Register.Indirect) {
813 attrib_index = get_indirect_index(bld,
814 reg->Register.File,
815 reg->Register.Index,
816 &reg->Indirect);
817 } else {
818 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
819 }
820
821 if (reg->Dimension.Indirect) {
822 vertex_index = get_indirect_index(bld,
823 reg->Register.File,
824 reg->Dimension.Index,
825 &reg->DimIndirect);
826 } else {
827 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
828 }
829
830
831 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
832 vertex_index, attrib_index,
833 swizzle_index);
834
835 assert(res);
836
837 if (stype == TGSI_TYPE_UNSIGNED) {
838 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
839 } else if (stype == TGSI_TYPE_SIGNED) {
840 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
841 }
842
843 return res;
844 }
845
846 static LLVMValueRef
847 emit_fetch_temporary(
848 struct lp_build_tgsi_context * bld_base,
849 const struct tgsi_full_src_register * reg,
850 enum tgsi_opcode_type stype,
851 unsigned swizzle)
852 {
853 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
854 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
855 LLVMBuilderRef builder = gallivm->builder;
856 struct lp_build_context *uint_bld = &bld_base->uint_bld;
857 LLVMValueRef indirect_index = NULL;
858 LLVMValueRef res;
859
860 if (reg->Register.Indirect) {
861 indirect_index = get_indirect_index(bld,
862 reg->Register.File,
863 reg->Register.Index,
864 &reg->Indirect);
865 }
866
867 if (reg->Register.Indirect) {
868 LLVMValueRef swizzle_vec =
869 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
870 LLVMValueRef length_vec =
871 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
872 bld->bld_base.base.type.length);
873 LLVMValueRef index_vec; /* index into the const buffer */
874 LLVMValueRef temps_array;
875 LLVMTypeRef float4_ptr_type;
876
877 /* index_vec = (indirect_index * 4 + swizzle) * length */
878 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
879 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
880 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
881
882 /* cast temps_array pointer to float* */
883 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
884 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
885 float4_ptr_type, "");
886
887 /* Gather values from the temporary register array */
888 res = build_gather(&bld_base->base, temps_array, index_vec);
889 }
890 else {
891 LLVMValueRef temp_ptr;
892 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
893 res = LLVMBuildLoad(builder, temp_ptr, "");
894 }
895
896 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
897 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
898 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
899 }
900
901 return res;
902 }
903
904 static LLVMValueRef
905 emit_fetch_system_value(
906 struct lp_build_tgsi_context * bld_base,
907 const struct tgsi_full_src_register * reg,
908 enum tgsi_opcode_type stype,
909 unsigned swizzle)
910 {
911 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
912 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
913 const struct tgsi_shader_info *info = bld->bld_base.info;
914 LLVMBuilderRef builder = gallivm->builder;
915 LLVMValueRef res;
916 enum tgsi_opcode_type atype; // Actual type of the value
917
918 assert(!reg->Register.Indirect);
919
920 switch (info->system_value_semantic_name[reg->Register.Index]) {
921 case TGSI_SEMANTIC_INSTANCEID:
922 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
923 atype = TGSI_TYPE_UNSIGNED;
924 break;
925
926 case TGSI_SEMANTIC_VERTEXID:
927 res = bld->system_values.vertex_id;
928 atype = TGSI_TYPE_UNSIGNED;
929 break;
930
931 case TGSI_SEMANTIC_PRIMID:
932 res = bld->system_values.prim_id;
933 atype = TGSI_TYPE_UNSIGNED;
934 break;
935
936 default:
937 assert(!"unexpected semantic in emit_fetch_system_value");
938 res = bld_base->base.zero;
939 atype = TGSI_TYPE_FLOAT;
940 break;
941 }
942
943 if (atype != stype) {
944 if (stype == TGSI_TYPE_FLOAT) {
945 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
946 } else if (stype == TGSI_TYPE_UNSIGNED) {
947 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
948 } else if (stype == TGSI_TYPE_SIGNED) {
949 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
950 }
951 }
952
953 return res;
954 }
955
956 /**
957 * Register fetch with derivatives.
958 */
959 static void
960 emit_fetch_deriv(
961 struct lp_build_tgsi_soa_context *bld,
962 LLVMValueRef src,
963 LLVMValueRef *res,
964 LLVMValueRef *ddx,
965 LLVMValueRef *ddy)
966 {
967 if(res)
968 *res = src;
969
970 /* TODO: use interpolation coeffs for inputs */
971
972 if(ddx)
973 *ddx = lp_build_ddx(&bld->bld_base.base, src);
974
975 if(ddy)
976 *ddy = lp_build_ddy(&bld->bld_base.base, src);
977 }
978
979
980 /**
981 * Predicate.
982 */
983 static void
984 emit_fetch_predicate(
985 struct lp_build_tgsi_soa_context *bld,
986 const struct tgsi_full_instruction *inst,
987 LLVMValueRef *pred)
988 {
989 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
990 unsigned index;
991 unsigned char swizzles[4];
992 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
993 LLVMValueRef value;
994 unsigned chan;
995
996 if (!inst->Instruction.Predicate) {
997 TGSI_FOR_EACH_CHANNEL( chan ) {
998 pred[chan] = NULL;
999 }
1000 return;
1001 }
1002
1003 swizzles[0] = inst->Predicate.SwizzleX;
1004 swizzles[1] = inst->Predicate.SwizzleY;
1005 swizzles[2] = inst->Predicate.SwizzleZ;
1006 swizzles[3] = inst->Predicate.SwizzleW;
1007
1008 index = inst->Predicate.Index;
1009 assert(index < LP_MAX_TGSI_PREDS);
1010
1011 TGSI_FOR_EACH_CHANNEL( chan ) {
1012 unsigned swizzle = swizzles[chan];
1013
1014 /*
1015 * Only fetch the predicate register channels that are actually listed
1016 * in the swizzles
1017 */
1018 if (!unswizzled[swizzle]) {
1019 value = LLVMBuildLoad(builder,
1020 bld->preds[index][swizzle], "");
1021
1022 /*
1023 * Convert the value to an integer mask.
1024 *
1025 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1026 * is needlessly causing two comparisons due to storing the intermediate
1027 * result as float vector instead of an integer mask vector.
1028 */
1029 value = lp_build_compare(bld->bld_base.base.gallivm,
1030 bld->bld_base.base.type,
1031 PIPE_FUNC_NOTEQUAL,
1032 value,
1033 bld->bld_base.base.zero);
1034 if (inst->Predicate.Negate) {
1035 value = LLVMBuildNot(builder, value, "");
1036 }
1037
1038 unswizzled[swizzle] = value;
1039 } else {
1040 value = unswizzled[swizzle];
1041 }
1042
1043 pred[chan] = value;
1044 }
1045 }
1046
1047 /**
1048 * Register store.
1049 */
1050 static void
1051 emit_store_chan(
1052 struct lp_build_tgsi_context *bld_base,
1053 const struct tgsi_full_instruction *inst,
1054 unsigned index,
1055 unsigned chan_index,
1056 LLVMValueRef pred,
1057 LLVMValueRef value)
1058 {
1059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1060 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1061 LLVMBuilderRef builder = gallivm->builder;
1062 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1063 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1064 LLVMValueRef indirect_index = NULL;
1065 struct lp_build_context *bld_store;
1066 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1067
1068 switch (dtype) {
1069 default:
1070 case TGSI_TYPE_FLOAT:
1071 case TGSI_TYPE_UNTYPED:
1072 bld_store = &bld_base->base;
1073 break;
1074 case TGSI_TYPE_UNSIGNED:
1075 bld_store = &bld_base->uint_bld;
1076 break;
1077 case TGSI_TYPE_SIGNED:
1078 bld_store = &bld_base->int_bld;
1079 break;
1080 case TGSI_TYPE_DOUBLE:
1081 case TGSI_TYPE_VOID:
1082 assert(0);
1083 bld_store = NULL;
1084 break;
1085 }
1086
1087 /* If the destination is untyped then the source can be anything,
1088 * but LLVM won't like if the types don't match so lets cast
1089 * to the correct destination type as expected by LLVM. */
1090 if (dtype == TGSI_TYPE_UNTYPED &&
1091 !lp_check_vec_type(bld_store->type, LLVMTypeOf(value))) {
1092 value = LLVMBuildBitCast(builder, value, bld_store->vec_type,
1093 "src_casted");
1094 }
1095
1096 switch( inst->Instruction.Saturate ) {
1097 case TGSI_SAT_NONE:
1098 break;
1099
1100 case TGSI_SAT_ZERO_ONE:
1101 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
1102 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
1103 break;
1104
1105 case TGSI_SAT_MINUS_PLUS_ONE:
1106 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
1107 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
1108 break;
1109
1110 default:
1111 assert(0);
1112 }
1113
1114 if (reg->Register.Indirect) {
1115 indirect_index = get_indirect_index(bld,
1116 reg->Register.File,
1117 reg->Register.Index,
1118 &reg->Indirect);
1119 } else {
1120 assert(reg->Register.Index <=
1121 bld->bld_base.info->file_max[reg->Register.File]);
1122 }
1123
1124 switch( reg->Register.File ) {
1125 case TGSI_FILE_OUTPUT:
1126 if (reg->Register.Indirect) {
1127 LLVMValueRef chan_vec =
1128 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1129 LLVMValueRef length_vec =
1130 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1131 LLVMValueRef index_vec; /* indexes into the temp registers */
1132 LLVMValueRef outputs_array;
1133 LLVMValueRef pixel_offsets;
1134 LLVMTypeRef float_ptr_type;
1135 int i;
1136
1137 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1138 pixel_offsets = uint_bld->undef;
1139 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1140 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1141 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1142 ii, ii, "");
1143 }
1144
1145 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1146 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1147 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1148 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1149 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1150
1151 float_ptr_type =
1152 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1153 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1154 float_ptr_type, "");
1155
1156 /* Scatter store values into temp registers */
1157 emit_mask_scatter(bld, outputs_array, index_vec, value,
1158 &bld->exec_mask, pred);
1159 }
1160 else {
1161 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1162 chan_index);
1163 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1164 }
1165 break;
1166
1167 case TGSI_FILE_TEMPORARY:
1168 if (reg->Register.Indirect) {
1169 LLVMValueRef chan_vec =
1170 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1171 LLVMValueRef length_vec =
1172 lp_build_const_int_vec(gallivm, uint_bld->type,
1173 bld->bld_base.base.type.length);
1174 LLVMValueRef index_vec; /* indexes into the temp registers */
1175 LLVMValueRef temps_array;
1176 LLVMValueRef pixel_offsets;
1177 LLVMTypeRef float_ptr_type;
1178 int i;
1179
1180 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1181 pixel_offsets = uint_bld->undef;
1182 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1183 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1184 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1185 ii, ii, "");
1186 }
1187
1188 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1189 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1190 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1191 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1192 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1193
1194 float_ptr_type =
1195 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1196 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1197 float_ptr_type, "");
1198
1199 /* Scatter store values into temp registers */
1200 emit_mask_scatter(bld, temps_array, index_vec, value,
1201 &bld->exec_mask, pred);
1202 }
1203 else {
1204 LLVMValueRef temp_ptr;
1205
1206 switch (dtype) {
1207 case TGSI_TYPE_UNSIGNED:
1208 case TGSI_TYPE_SIGNED: {
1209 LLVMTypeRef itype = bld_base->int_bld.vec_type;
1210 LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1211 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1212 chan_index);
1213 LLVMValueRef temp_value_ptr;
1214
1215 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1216 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1217 value = temp_value_ptr;
1218 break;
1219 }
1220 default:
1221 case TGSI_TYPE_FLOAT:
1222 case TGSI_TYPE_UNTYPED:
1223 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1224 chan_index);
1225 break;
1226 }
1227
1228 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1229 }
1230 break;
1231
1232 case TGSI_FILE_ADDRESS:
1233 assert(dtype == TGSI_TYPE_SIGNED);
1234 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1235 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1236 bld->addr[reg->Register.Index][chan_index]);
1237 break;
1238
1239 case TGSI_FILE_PREDICATE:
1240 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1241 bld->preds[reg->Register.Index][chan_index]);
1242 break;
1243
1244 default:
1245 assert( 0 );
1246 }
1247 }
1248
1249 static void
1250 emit_store(
1251 struct lp_build_tgsi_context * bld_base,
1252 const struct tgsi_full_instruction * inst,
1253 const struct tgsi_opcode_info * info,
1254 LLVMValueRef dst[4])
1255
1256 {
1257 unsigned chan_index;
1258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1259
1260 if(info->num_dst) {
1261 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1262
1263 emit_fetch_predicate( bld, inst, pred );
1264
1265 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1266 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1267 }
1268 }
1269 }
1270
1271 /**
1272 * High-level instruction translators.
1273 */
1274
1275 static void
1276 emit_tex( struct lp_build_tgsi_soa_context *bld,
1277 const struct tgsi_full_instruction *inst,
1278 enum lp_build_tex_modifier modifier,
1279 LLVMValueRef *texel)
1280 {
1281 unsigned unit;
1282 LLVMValueRef lod_bias, explicit_lod;
1283 LLVMValueRef oow = NULL;
1284 LLVMValueRef coords[4];
1285 LLVMValueRef offsets[3] = { NULL };
1286 struct lp_derivatives derivs;
1287 struct lp_derivatives *deriv_ptr = NULL;
1288 unsigned num_coords, num_derivs, num_offsets;
1289 unsigned i;
1290
1291 if (!bld->sampler) {
1292 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1293 for (i = 0; i < 4; i++) {
1294 texel[i] = bld->bld_base.base.undef;
1295 }
1296 return;
1297 }
1298
1299 switch (inst->Texture.Texture) {
1300 case TGSI_TEXTURE_1D:
1301 num_coords = 1;
1302 num_offsets = 1;
1303 num_derivs = 1;
1304 break;
1305 case TGSI_TEXTURE_1D_ARRAY:
1306 num_coords = 2;
1307 num_offsets = 1;
1308 num_derivs = 1;
1309 break;
1310 case TGSI_TEXTURE_2D:
1311 case TGSI_TEXTURE_RECT:
1312 num_coords = 2;
1313 num_offsets = 2;
1314 num_derivs = 2;
1315 break;
1316 case TGSI_TEXTURE_SHADOW1D:
1317 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1318 num_coords = 3;
1319 num_offsets = 1;
1320 num_derivs = 1;
1321 break;
1322 case TGSI_TEXTURE_SHADOW2D:
1323 case TGSI_TEXTURE_SHADOWRECT:
1324 case TGSI_TEXTURE_2D_ARRAY:
1325 num_coords = 3;
1326 num_offsets = 2;
1327 num_derivs = 2;
1328 break;
1329 case TGSI_TEXTURE_CUBE:
1330 num_coords = 3;
1331 num_offsets = 2;
1332 num_derivs = 3;
1333 break;
1334 case TGSI_TEXTURE_3D:
1335 num_coords = 3;
1336 num_offsets = 3;
1337 num_derivs = 3;
1338 break;
1339 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1340 num_coords = 4;
1341 num_offsets = 2;
1342 num_derivs = 2;
1343 break;
1344 case TGSI_TEXTURE_SHADOWCUBE:
1345 num_coords = 4;
1346 num_offsets = 2;
1347 num_derivs = 3;
1348 break;
1349 default:
1350 assert(0);
1351 return;
1352 }
1353
1354 /* Note lod and especially projected are illegal in a LOT of cases */
1355 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1356 assert(num_coords < 4);
1357 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1358 explicit_lod = NULL;
1359 }
1360 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1361 assert(num_coords < 4);
1362 lod_bias = NULL;
1363 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1364 }
1365 else {
1366 lod_bias = NULL;
1367 explicit_lod = NULL;
1368 }
1369
1370 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1371 assert(num_coords < 4);
1372 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1373 oow = lp_build_rcp(&bld->bld_base.base, oow);
1374 }
1375
1376 for (i = 0; i < num_coords; i++) {
1377 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1378 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1379 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1380 }
1381 for (i = num_coords; i < 4; i++) {
1382 coords[i] = bld->bld_base.base.undef;
1383 }
1384
1385 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1386 unsigned dim;
1387 for (dim = 0; dim < num_derivs; ++dim) {
1388 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1389 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1390 }
1391 deriv_ptr = &derivs;
1392 unit = inst->Src[3].Register.Index;
1393 } else {
1394 unit = inst->Src[1].Register.Index;
1395 }
1396
1397 /* some advanced gather instructions (txgo) would require 4 offsets */
1398 if (inst->Texture.NumOffsets == 1) {
1399 unsigned dim;
1400 for (dim = 0; dim < num_offsets; dim++) {
1401 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1402 }
1403 }
1404
1405 bld->sampler->emit_fetch_texel(bld->sampler,
1406 bld->bld_base.base.gallivm,
1407 bld->bld_base.base.type,
1408 FALSE,
1409 unit, unit,
1410 coords,
1411 offsets,
1412 deriv_ptr,
1413 lod_bias, explicit_lod,
1414 texel);
1415 }
1416
1417 static void
1418 emit_sample(struct lp_build_tgsi_soa_context *bld,
1419 const struct tgsi_full_instruction *inst,
1420 enum lp_build_tex_modifier modifier,
1421 boolean compare,
1422 LLVMValueRef *texel)
1423 {
1424 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1425 unsigned texture_unit, sampler_unit;
1426 LLVMValueRef lod_bias, explicit_lod;
1427 LLVMValueRef coords[4];
1428 LLVMValueRef offsets[3] = { NULL };
1429 struct lp_derivatives derivs;
1430 struct lp_derivatives *deriv_ptr = NULL;
1431 unsigned num_coords, num_offsets, num_derivs;
1432 unsigned i;
1433
1434 if (!bld->sampler) {
1435 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1436 for (i = 0; i < 4; i++) {
1437 texel[i] = bld->bld_base.base.undef;
1438 }
1439 return;
1440 }
1441
1442 /*
1443 * unlike old-style tex opcodes the texture/sampler indices
1444 * always come from src1 and src2 respectively.
1445 */
1446 texture_unit = inst->Src[1].Register.Index;
1447 sampler_unit = inst->Src[2].Register.Index;
1448
1449 /*
1450 * Note inst->Texture.Texture will contain the number of offsets,
1451 * however the target information is NOT there and comes from the
1452 * declared sampler views instead.
1453 */
1454 switch (bld->sv[texture_unit].Resource) {
1455 case TGSI_TEXTURE_1D:
1456 num_coords = 1;
1457 num_offsets = 1;
1458 num_derivs = 1;
1459 break;
1460 case TGSI_TEXTURE_1D_ARRAY:
1461 num_coords = 2;
1462 num_offsets = 1;
1463 num_derivs = 1;
1464 break;
1465 case TGSI_TEXTURE_2D:
1466 case TGSI_TEXTURE_RECT:
1467 num_coords = 2;
1468 num_offsets = 2;
1469 num_derivs = 2;
1470 break;
1471 case TGSI_TEXTURE_2D_ARRAY:
1472 num_coords = 3;
1473 num_offsets = 2;
1474 num_derivs = 2;
1475 break;
1476 case TGSI_TEXTURE_CUBE:
1477 num_coords = 3;
1478 num_offsets = 2;
1479 num_derivs = 3;
1480 break;
1481 case TGSI_TEXTURE_3D:
1482 num_coords = 3;
1483 num_offsets = 3;
1484 num_derivs = 3;
1485 break;
1486 case TGSI_TEXTURE_CUBE_ARRAY:
1487 num_coords = 4;
1488 num_offsets = 2;
1489 num_derivs = 3;
1490 break;
1491 default:
1492 assert(0);
1493 return;
1494 }
1495
1496 /*
1497 * unlike old-style tex opcodes the texture/sampler indices
1498 * always come from src1 and src2 respectively.
1499 */
1500 texture_unit = inst->Src[1].Register.Index;
1501 sampler_unit = inst->Src[2].Register.Index;
1502
1503 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1504 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1505 explicit_lod = NULL;
1506 }
1507 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1508 lod_bias = NULL;
1509 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1510 }
1511 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1512 lod_bias = NULL;
1513 /* XXX might be better to explicitly pass the level zero information */
1514 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1515 }
1516 else {
1517 lod_bias = NULL;
1518 explicit_lod = NULL;
1519 }
1520
1521 for (i = 0; i < num_coords; i++) {
1522 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1523 }
1524 for (i = num_coords; i < 4; i++) {
1525 coords[i] = bld->bld_base.base.undef;
1526 }
1527 /*
1528 * XXX: whack shadow comparison value into place.
1529 * Should probably fix the interface for separate value
1530 * (it will not work for cube arrays if it is part of coords).
1531 */
1532 if (compare) {
1533 unsigned c_coord = num_coords > 2 ? 3 : 2;
1534 assert(num_coords < 4);
1535 coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1536 }
1537
1538 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1539 unsigned dim;
1540 for (dim = 0; dim < num_derivs; ++dim) {
1541 derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
1542 derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
1543 }
1544 deriv_ptr = &derivs;
1545 }
1546
1547 /* some advanced gather instructions (txgo) would require 4 offsets */
1548 if (inst->Texture.NumOffsets == 1) {
1549 unsigned dim;
1550 for (dim = 0; dim < num_offsets; dim++) {
1551 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1552 }
1553 }
1554
1555 bld->sampler->emit_fetch_texel(bld->sampler,
1556 bld->bld_base.base.gallivm,
1557 bld->bld_base.base.type,
1558 FALSE,
1559 texture_unit, sampler_unit,
1560 coords,
1561 offsets,
1562 deriv_ptr,
1563 lod_bias, explicit_lod,
1564 texel);
1565 }
1566
1567 static void
1568 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
1569 const struct tgsi_full_instruction *inst,
1570 LLVMValueRef *texel,
1571 boolean is_samplei)
1572 {
1573 unsigned unit, target;
1574 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1575 LLVMValueRef explicit_lod = NULL;
1576 LLVMValueRef coords[3];
1577 LLVMValueRef offsets[3] = { NULL };
1578 unsigned num_coords;
1579 unsigned dims;
1580 unsigned i;
1581
1582 if (!bld->sampler) {
1583 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1584 for (i = 0; i < 4; i++) {
1585 texel[i] = coord_undef;
1586 }
1587 return;
1588 }
1589
1590 unit = inst->Src[1].Register.Index;
1591
1592 if (is_samplei) {
1593 target = bld->sv[unit].Resource;
1594 }
1595 else {
1596 target = inst->Texture.Texture;
1597 }
1598
1599 switch (target) {
1600 case TGSI_TEXTURE_1D:
1601 case TGSI_TEXTURE_BUFFER:
1602 num_coords = 1;
1603 dims = 1;
1604 break;
1605 case TGSI_TEXTURE_1D_ARRAY:
1606 num_coords = 2;
1607 dims = 1;
1608 break;
1609 case TGSI_TEXTURE_2D:
1610 case TGSI_TEXTURE_RECT:
1611 num_coords = 2;
1612 dims = 2;
1613 break;
1614 case TGSI_TEXTURE_2D_ARRAY:
1615 num_coords = 3;
1616 dims = 2;
1617 break;
1618 case TGSI_TEXTURE_3D:
1619 num_coords = 3;
1620 dims = 3;
1621 break;
1622 default:
1623 assert(0);
1624 return;
1625 }
1626
1627 /* always have lod except for buffers ? */
1628 if (target != TGSI_TEXTURE_BUFFER) {
1629 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1630 }
1631
1632 for (i = 0; i < num_coords; i++) {
1633 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1634 }
1635 for (i = num_coords; i < 3; i++) {
1636 coords[i] = coord_undef;
1637 }
1638
1639 if (inst->Texture.NumOffsets == 1) {
1640 unsigned dim;
1641 for (dim = 0; dim < dims; dim++) {
1642 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1643 }
1644 }
1645
1646 bld->sampler->emit_fetch_texel(bld->sampler,
1647 bld->bld_base.base.gallivm,
1648 bld->bld_base.base.type,
1649 TRUE,
1650 unit, unit,
1651 coords,
1652 offsets,
1653 NULL,
1654 NULL, explicit_lod,
1655 texel);
1656 }
1657
1658 static void
1659 emit_size_query( struct lp_build_tgsi_soa_context *bld,
1660 const struct tgsi_full_instruction *inst,
1661 LLVMValueRef *sizes_out,
1662 boolean is_sviewinfo)
1663 {
1664 LLVMValueRef explicit_lod;
1665 unsigned has_lod;
1666 unsigned i;
1667 unsigned unit = inst->Src[1].Register.Index;
1668 unsigned target;
1669
1670 if (is_sviewinfo) {
1671 target = bld->sv[unit].Resource;
1672 }
1673 else {
1674 target = inst->Texture.Texture;
1675 }
1676 switch (target) {
1677 case TGSI_TEXTURE_BUFFER:
1678 case TGSI_TEXTURE_RECT:
1679 case TGSI_TEXTURE_SHADOWRECT:
1680 has_lod = 0;
1681 break;
1682 default:
1683 has_lod = 1;
1684 break;
1685 }
1686
1687 if (!bld->sampler) {
1688 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1689 for (i = 0; i < 4; i++)
1690 sizes_out[i] = bld->bld_base.int_bld.undef;
1691 return;
1692 }
1693
1694 if (has_lod)
1695 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
1696 else
1697 explicit_lod = NULL;
1698
1699 bld->sampler->emit_size_query(bld->sampler,
1700 bld->bld_base.base.gallivm,
1701 bld->bld_base.int_bld.type,
1702 unit,
1703 is_sviewinfo,
1704 explicit_lod,
1705 sizes_out);
1706 }
1707
1708 static boolean
1709 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1710 int pc)
1711 {
1712 int i;
1713
1714 for (i = 0; i < 5; i++) {
1715 unsigned opcode;
1716
1717 if (pc + i >= bld->bld_base.info->num_instructions)
1718 return TRUE;
1719
1720 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1721
1722 if (opcode == TGSI_OPCODE_END)
1723 return TRUE;
1724
1725 if (opcode == TGSI_OPCODE_TEX ||
1726 opcode == TGSI_OPCODE_TXP ||
1727 opcode == TGSI_OPCODE_TXD ||
1728 opcode == TGSI_OPCODE_TXB ||
1729 opcode == TGSI_OPCODE_TXL ||
1730 opcode == TGSI_OPCODE_TXF ||
1731 opcode == TGSI_OPCODE_TXQ ||
1732 opcode == TGSI_OPCODE_CAL ||
1733 opcode == TGSI_OPCODE_CALLNZ ||
1734 opcode == TGSI_OPCODE_IF ||
1735 opcode == TGSI_OPCODE_BGNLOOP ||
1736 opcode == TGSI_OPCODE_SWITCH)
1737 return FALSE;
1738 }
1739
1740 return TRUE;
1741 }
1742
1743
1744
1745 /**
1746 * Kill fragment if any of the src register values are negative.
1747 */
1748 static void
1749 emit_kil(
1750 struct lp_build_tgsi_soa_context *bld,
1751 const struct tgsi_full_instruction *inst,
1752 int pc)
1753 {
1754 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1755 const struct tgsi_full_src_register *reg = &inst->Src[0];
1756 LLVMValueRef terms[TGSI_NUM_CHANNELS];
1757 LLVMValueRef mask;
1758 unsigned chan_index;
1759
1760 memset(&terms, 0, sizeof terms);
1761
1762 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1763 unsigned swizzle;
1764
1765 /* Unswizzle channel */
1766 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1767
1768 /* Check if the component has not been already tested. */
1769 assert(swizzle < TGSI_NUM_CHANNELS);
1770 if( !terms[swizzle] )
1771 /* TODO: change the comparison operator instead of setting the sign */
1772 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1773 }
1774
1775 mask = NULL;
1776 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1777 if(terms[chan_index]) {
1778 LLVMValueRef chan_mask;
1779
1780 /*
1781 * If term < 0 then mask = 0 else mask = ~0.
1782 */
1783 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1784
1785 if(mask)
1786 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1787 else
1788 mask = chan_mask;
1789 }
1790 }
1791
1792 if(mask) {
1793 lp_build_mask_update(bld->mask, mask);
1794
1795 if (!near_end_of_shader(bld, pc))
1796 lp_build_mask_check(bld->mask);
1797 }
1798 }
1799
1800
1801 /**
1802 * Predicated fragment kill.
1803 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1804 * The only predication is the execution mask which will apply if
1805 * we're inside a loop or conditional.
1806 */
1807 static void
1808 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1809 int pc)
1810 {
1811 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1812 LLVMValueRef mask;
1813
1814 /* For those channels which are "alive", disable fragment shader
1815 * execution.
1816 */
1817 if (bld->exec_mask.has_mask) {
1818 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1819 }
1820 else {
1821 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1822 mask = zero;
1823 }
1824
1825 lp_build_mask_update(bld->mask, mask);
1826
1827 if (!near_end_of_shader(bld, pc))
1828 lp_build_mask_check(bld->mask);
1829 }
1830
1831
1832 /**
1833 * Emit code which will dump the value of all the temporary registers
1834 * to stdout.
1835 */
1836 static void
1837 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1838 {
1839 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1840 LLVMBuilderRef builder = gallivm->builder;
1841 LLVMValueRef temp_ptr;
1842 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1843 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1844 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1845 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1846 int index;
1847 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1848
1849 for (index = 0; index < n; index++) {
1850 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1851 LLVMValueRef v[4][4], res;
1852 int chan;
1853
1854 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1855
1856 for (chan = 0; chan < 4; chan++) {
1857 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1858 res = LLVMBuildLoad(builder, temp_ptr, "");
1859 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1860 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1861 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1862 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1863 }
1864
1865 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1866 v[0][0], v[0][1], v[0][2], v[0][3]);
1867 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1868 v[1][0], v[1][1], v[1][2], v[1][3]);
1869 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1870 v[2][0], v[2][1], v[2][2], v[2][3]);
1871 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1872 v[3][0], v[3][1], v[3][2], v[3][3]);
1873 }
1874 }
1875
1876
1877
1878 void
1879 lp_emit_declaration_soa(
1880 struct lp_build_tgsi_context *bld_base,
1881 const struct tgsi_full_declaration *decl)
1882 {
1883 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1884 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1885 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1886 const unsigned first = decl->Range.First;
1887 const unsigned last = decl->Range.Last;
1888 unsigned idx, i;
1889
1890 for (idx = first; idx <= last; ++idx) {
1891 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1892 switch (decl->Declaration.File) {
1893 case TGSI_FILE_TEMPORARY:
1894 assert(idx < LP_MAX_TGSI_TEMPS);
1895 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1896 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1897 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1898 }
1899 break;
1900
1901 case TGSI_FILE_OUTPUT:
1902 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1903 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1904 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1905 vec_type, "output");
1906 }
1907 break;
1908
1909 case TGSI_FILE_ADDRESS:
1910 /* ADDR registers are only allocated with an integer LLVM IR type,
1911 * as they are guaranteed to always have integers.
1912 * XXX: Not sure if this exception is worthwhile (or the whole idea of
1913 * an ADDR register for that matter).
1914 */
1915 assert(idx < LP_MAX_TGSI_ADDRS);
1916 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1917 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1918 break;
1919
1920 case TGSI_FILE_PREDICATE:
1921 assert(idx < LP_MAX_TGSI_PREDS);
1922 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1923 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1924 "predicate");
1925 break;
1926
1927 case TGSI_FILE_SAMPLER_VIEW:
1928 /*
1929 * The target stored here MUST match whatever there actually
1930 * is in the set sampler views (what about return type?).
1931 */
1932 assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
1933 bld->sv[idx] = decl->SamplerView;
1934 break;
1935
1936 default:
1937 /* don't need to declare other vars */
1938 break;
1939 }
1940 }
1941 }
1942
1943
1944 void lp_emit_immediate_soa(
1945 struct lp_build_tgsi_context *bld_base,
1946 const struct tgsi_full_immediate *imm)
1947 {
1948 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1949 struct gallivm_state * gallivm = bld_base->base.gallivm;
1950
1951 /* simply copy the immediate values into the next immediates[] slot */
1952 unsigned i;
1953 const uint size = imm->Immediate.NrTokens - 1;
1954 assert(size <= 4);
1955 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1956 switch (imm->Immediate.DataType) {
1957 case TGSI_IMM_FLOAT32:
1958 for( i = 0; i < size; ++i )
1959 bld->immediates[bld->num_immediates][i] =
1960 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1961
1962 break;
1963 case TGSI_IMM_UINT32:
1964 for( i = 0; i < size; ++i ) {
1965 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1966 bld->immediates[bld->num_immediates][i] =
1967 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1968 }
1969
1970 break;
1971 case TGSI_IMM_INT32:
1972 for( i = 0; i < size; ++i ) {
1973 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1974 bld->immediates[bld->num_immediates][i] =
1975 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1976 }
1977
1978 break;
1979 }
1980 for( i = size; i < 4; ++i )
1981 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1982
1983 bld->num_immediates++;
1984 }
1985
1986 static void
1987 ddx_emit(
1988 const struct lp_build_tgsi_action * action,
1989 struct lp_build_tgsi_context * bld_base,
1990 struct lp_build_emit_data * emit_data)
1991 {
1992 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1993
1994 emit_fetch_deriv(bld, emit_data->args[0], NULL,
1995 &emit_data->output[emit_data->chan], NULL);
1996 }
1997
1998 static void
1999 ddy_emit(
2000 const struct lp_build_tgsi_action * action,
2001 struct lp_build_tgsi_context * bld_base,
2002 struct lp_build_emit_data * emit_data)
2003 {
2004 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2005
2006 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2007 &emit_data->output[emit_data->chan]);
2008 }
2009
2010 static void
2011 kilp_emit(
2012 const struct lp_build_tgsi_action * action,
2013 struct lp_build_tgsi_context * bld_base,
2014 struct lp_build_emit_data * emit_data)
2015 {
2016 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2017
2018 emit_kilp(bld, bld_base->pc - 1);
2019 }
2020
2021 static void
2022 kil_emit(
2023 const struct lp_build_tgsi_action * action,
2024 struct lp_build_tgsi_context * bld_base,
2025 struct lp_build_emit_data * emit_data)
2026 {
2027 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2028
2029 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
2030 }
2031
2032 static void
2033 tex_emit(
2034 const struct lp_build_tgsi_action * action,
2035 struct lp_build_tgsi_context * bld_base,
2036 struct lp_build_emit_data * emit_data)
2037 {
2038 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2039
2040 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
2041 }
2042
2043 static void
2044 txb_emit(
2045 const struct lp_build_tgsi_action * action,
2046 struct lp_build_tgsi_context * bld_base,
2047 struct lp_build_emit_data * emit_data)
2048 {
2049 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2050
2051 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2052 emit_data->output);
2053 }
2054
2055 static void
2056 txd_emit(
2057 const struct lp_build_tgsi_action * action,
2058 struct lp_build_tgsi_context * bld_base,
2059 struct lp_build_emit_data * emit_data)
2060 {
2061 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2062
2063 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2064 emit_data->output);
2065 }
2066
2067 static void
2068 txl_emit(
2069 const struct lp_build_tgsi_action * action,
2070 struct lp_build_tgsi_context * bld_base,
2071 struct lp_build_emit_data * emit_data)
2072 {
2073 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2074
2075 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2076 emit_data->output);
2077 }
2078
2079 static void
2080 txp_emit(
2081 const struct lp_build_tgsi_action * action,
2082 struct lp_build_tgsi_context * bld_base,
2083 struct lp_build_emit_data * emit_data)
2084 {
2085 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2086
2087 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2088 emit_data->output);
2089 }
2090
2091 static void
2092 txq_emit(
2093 const struct lp_build_tgsi_action * action,
2094 struct lp_build_tgsi_context * bld_base,
2095 struct lp_build_emit_data * emit_data)
2096 {
2097 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2098
2099 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2100 }
2101
2102 static void
2103 txf_emit(
2104 const struct lp_build_tgsi_action * action,
2105 struct lp_build_tgsi_context * bld_base,
2106 struct lp_build_emit_data * emit_data)
2107 {
2108 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2109
2110 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
2111 }
2112
2113 static void
2114 sample_i_emit(
2115 const struct lp_build_tgsi_action * action,
2116 struct lp_build_tgsi_context * bld_base,
2117 struct lp_build_emit_data * emit_data)
2118 {
2119 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2120
2121 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
2122 }
2123
2124 static void
2125 sample_emit(
2126 const struct lp_build_tgsi_action * action,
2127 struct lp_build_tgsi_context * bld_base,
2128 struct lp_build_emit_data * emit_data)
2129 {
2130 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2131
2132 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2133 FALSE, emit_data->output);
2134 }
2135
2136 static void
2137 sample_b_emit(
2138 const struct lp_build_tgsi_action * action,
2139 struct lp_build_tgsi_context * bld_base,
2140 struct lp_build_emit_data * emit_data)
2141 {
2142 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2143
2144 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2145 FALSE, emit_data->output);
2146 }
2147
2148 static void
2149 sample_c_emit(
2150 const struct lp_build_tgsi_action * action,
2151 struct lp_build_tgsi_context * bld_base,
2152 struct lp_build_emit_data * emit_data)
2153 {
2154 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2155
2156 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2157 TRUE, emit_data->output);
2158 }
2159
2160 static void
2161 sample_c_lz_emit(
2162 const struct lp_build_tgsi_action * action,
2163 struct lp_build_tgsi_context * bld_base,
2164 struct lp_build_emit_data * emit_data)
2165 {
2166 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2167
2168 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2169 TRUE, emit_data->output);
2170 }
2171
2172 static void
2173 sample_d_emit(
2174 const struct lp_build_tgsi_action * action,
2175 struct lp_build_tgsi_context * bld_base,
2176 struct lp_build_emit_data * emit_data)
2177 {
2178 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2179
2180 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2181 FALSE, emit_data->output);
2182 }
2183
2184 static void
2185 sample_l_emit(
2186 const struct lp_build_tgsi_action * action,
2187 struct lp_build_tgsi_context * bld_base,
2188 struct lp_build_emit_data * emit_data)
2189 {
2190 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2191
2192 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2193 FALSE, emit_data->output);
2194 }
2195
2196 static void
2197 sviewinfo_emit(
2198 const struct lp_build_tgsi_action * action,
2199 struct lp_build_tgsi_context * bld_base,
2200 struct lp_build_emit_data * emit_data)
2201 {
2202 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2203
2204 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2205 }
2206
2207 static LLVMValueRef
2208 mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
2209 {
2210 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2211 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2212 LLVMValueRef one_vec = bld_base->int_bld.one;
2213 struct lp_exec_mask *exec_mask = &bld->exec_mask;
2214
2215 if (exec_mask->has_mask) {
2216 one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
2217 }
2218 one_vec = LLVMBuildAnd(builder, one_vec,
2219 lp_build_mask_value(bld->mask), "");
2220 return one_vec;
2221 }
2222
2223 static void
2224 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
2225 LLVMValueRef ptr,
2226 LLVMValueRef mask)
2227 {
2228 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2229
2230 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2231
2232 current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
2233
2234 LLVMBuildStore(builder, current_vec, ptr);
2235 }
2236
2237 static void
2238 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
2239 LLVMValueRef ptr,
2240 LLVMValueRef mask)
2241 {
2242 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2243
2244 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
2245 LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
2246 PIPE_FUNC_NOTEQUAL,
2247 mask,
2248 bld_base->uint_bld.zero);
2249
2250 current_vec = lp_build_select(&bld_base->uint_bld,
2251 full_mask,
2252 bld_base->uint_bld.zero,
2253 current_vec);
2254
2255 LLVMBuildStore(builder, current_vec, ptr);
2256 }
2257
2258 static void
2259 emit_vertex(
2260 const struct lp_build_tgsi_action * action,
2261 struct lp_build_tgsi_context * bld_base,
2262 struct lp_build_emit_data * emit_data)
2263 {
2264 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2265 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2266
2267 if (bld->gs_iface->emit_vertex) {
2268 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2269 LLVMValueRef total_emitted_vertices_vec =
2270 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2271 gather_outputs(bld);
2272 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
2273 bld->outputs,
2274 total_emitted_vertices_vec);
2275 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
2276 masked_ones);
2277 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
2278 masked_ones);
2279 #if DUMP_GS_EMITS
2280 lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex masked ones = ",
2281 masked_ones);
2282 lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex emitted = ",
2283 total_emitted_vertices_vec);
2284 #endif
2285 bld->pending_end_primitive = TRUE;
2286 }
2287 }
2288
2289
2290 static void
2291 end_primitive(
2292 const struct lp_build_tgsi_action * action,
2293 struct lp_build_tgsi_context * bld_base,
2294 struct lp_build_emit_data * emit_data)
2295 {
2296 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2297 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2298
2299 if (bld->gs_iface->end_primitive) {
2300 LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
2301 LLVMValueRef emitted_vertices_vec =
2302 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
2303 LLVMValueRef emitted_prims_vec =
2304 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2305
2306 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
2307 emitted_vertices_vec,
2308 emitted_prims_vec);
2309
2310 #if DUMP_GS_EMITS
2311 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim masked ones = ",
2312 masked_ones);
2313 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted verts1 = ",
2314 emitted_vertices_vec);
2315 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted prims1 = ",
2316 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""));
2317 #endif
2318 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
2319 masked_ones);
2320 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
2321 masked_ones);
2322 #if DUMP_GS_EMITS
2323 lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted verts2 = ",
2324 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""));
2325 #endif
2326
2327 bld->pending_end_primitive = FALSE;
2328 }
2329 }
2330
2331 static void
2332 cal_emit(
2333 const struct lp_build_tgsi_action * action,
2334 struct lp_build_tgsi_context * bld_base,
2335 struct lp_build_emit_data * emit_data)
2336 {
2337 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2338
2339 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2340 &bld_base->pc);
2341 }
2342
2343 static void
2344 ret_emit(
2345 const struct lp_build_tgsi_action * action,
2346 struct lp_build_tgsi_context * bld_base,
2347 struct lp_build_emit_data * emit_data)
2348 {
2349 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2350
2351 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2352 }
2353
2354 static void
2355 brk_emit(
2356 const struct lp_build_tgsi_action * action,
2357 struct lp_build_tgsi_context * bld_base,
2358 struct lp_build_emit_data * emit_data)
2359 {
2360 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2361
2362 lp_exec_break(&bld->exec_mask);
2363 }
2364
2365 static void
2366 breakc_emit(
2367 const struct lp_build_tgsi_action * action,
2368 struct lp_build_tgsi_context * bld_base,
2369 struct lp_build_emit_data * emit_data)
2370 {
2371 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2372 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2373 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2374 LLVMValueRef unsigned_cond =
2375 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
2376 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
2377 unsigned_cond,
2378 uint_bld->zero);
2379
2380 lp_exec_break_condition(&bld->exec_mask, cond);
2381 }
2382
2383 static void
2384 if_emit(
2385 const struct lp_build_tgsi_action * action,
2386 struct lp_build_tgsi_context * bld_base,
2387 struct lp_build_emit_data * emit_data)
2388 {
2389 LLVMValueRef tmp;
2390 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2391
2392 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2393 emit_data->args[0], bld->bld_base.base.zero);
2394 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2395 }
2396
2397 static void
2398 bgnloop_emit(
2399 const struct lp_build_tgsi_action * action,
2400 struct lp_build_tgsi_context * bld_base,
2401 struct lp_build_emit_data * emit_data)
2402 {
2403 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2404
2405 lp_exec_bgnloop(&bld->exec_mask);
2406 }
2407
2408 static void
2409 bgnsub_emit(
2410 const struct lp_build_tgsi_action * action,
2411 struct lp_build_tgsi_context * bld_base,
2412 struct lp_build_emit_data * emit_data)
2413 {
2414 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2415
2416 lp_exec_mask_bgnsub(&bld->exec_mask);
2417 }
2418
2419 static void
2420 else_emit(
2421 const struct lp_build_tgsi_action * action,
2422 struct lp_build_tgsi_context * bld_base,
2423 struct lp_build_emit_data * emit_data)
2424 {
2425 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2426
2427 lp_exec_mask_cond_invert(&bld->exec_mask);
2428 }
2429
2430 static void
2431 endif_emit(
2432 const struct lp_build_tgsi_action * action,
2433 struct lp_build_tgsi_context * bld_base,
2434 struct lp_build_emit_data * emit_data)
2435 {
2436 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2437
2438 lp_exec_mask_cond_pop(&bld->exec_mask);
2439 }
2440
2441 static void
2442 endloop_emit(
2443 const struct lp_build_tgsi_action * action,
2444 struct lp_build_tgsi_context * bld_base,
2445 struct lp_build_emit_data * emit_data)
2446 {
2447 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2448
2449 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
2450 }
2451
2452 static void
2453 endsub_emit(
2454 const struct lp_build_tgsi_action * action,
2455 struct lp_build_tgsi_context * bld_base,
2456 struct lp_build_emit_data * emit_data)
2457 {
2458 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2459
2460 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
2461 }
2462
2463 static void
2464 cont_emit(
2465 const struct lp_build_tgsi_action * action,
2466 struct lp_build_tgsi_context * bld_base,
2467 struct lp_build_emit_data * emit_data)
2468 {
2469 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2470
2471 lp_exec_continue(&bld->exec_mask);
2472 }
2473
2474 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2475 *
2476 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2477 * from old code, but there is no garauntee that LLVM will use those registers
2478 * for this code.
2479 *
2480 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2481 * should be handled by the emit_data->fetch_args function. */
2482 static void
2483 nrm_emit(
2484 const struct lp_build_tgsi_action * action,
2485 struct lp_build_tgsi_context * bld_base,
2486 struct lp_build_emit_data * emit_data)
2487 {
2488 LLVMValueRef tmp0, tmp1;
2489 LLVMValueRef tmp4 = NULL;
2490 LLVMValueRef tmp5 = NULL;
2491 LLVMValueRef tmp6 = NULL;
2492 LLVMValueRef tmp7 = NULL;
2493 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2494
2495 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2496
2497 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2498 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2499 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2500 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2501
2502 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2503
2504 /* xmm4 = src.x */
2505 /* xmm0 = src.x * src.x */
2506 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2507 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2508 tmp4 = tmp0;
2509 }
2510 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
2511
2512 /* xmm5 = src.y */
2513 /* xmm0 = xmm0 + src.y * src.y */
2514 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
2515 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2516 tmp5 = tmp1;
2517 }
2518 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2519 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2520
2521 /* xmm6 = src.z */
2522 /* xmm0 = xmm0 + src.z * src.z */
2523 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
2524 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2525 tmp6 = tmp1;
2526 }
2527 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2528 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2529
2530 if (dims == 4) {
2531 /* xmm7 = src.w */
2532 /* xmm0 = xmm0 + src.w * src.w */
2533 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2534 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
2535 tmp7 = tmp1;
2536 }
2537 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2538 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2539 }
2540 /* xmm1 = 1 / sqrt(xmm0) */
2541 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
2542 /* dst.x = xmm1 * src.x */
2543 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2544 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
2545 }
2546 /* dst.y = xmm1 * src.y */
2547 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2548 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
2549 }
2550
2551 /* dst.z = xmm1 * src.z */
2552 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2553 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
2554 }
2555 /* dst.w = xmm1 * src.w */
2556 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
2557 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
2558 }
2559 }
2560
2561 /* dst.w = 1.0 */
2562 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
2563 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
2564 }
2565 }
2566
2567 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
2568 {
2569 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2570 struct gallivm_state * gallivm = bld_base->base.gallivm;
2571
2572 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2573 LLVMValueRef array_size =
2574 lp_build_const_int32(gallivm,
2575 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2576 bld->temps_array = lp_build_array_alloca(gallivm,
2577 bld_base->base.vec_type, array_size,
2578 "temp_array");
2579 }
2580
2581 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2582 LLVMValueRef array_size =
2583 lp_build_const_int32(gallivm,
2584 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2585 bld->outputs_array = lp_build_array_alloca(gallivm,
2586 bld_base->base.vec_type, array_size,
2587 "output_array");
2588 }
2589
2590 /* If we have indirect addressing in inputs we need to copy them into
2591 * our alloca array to be able to iterate over them */
2592 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
2593 unsigned index, chan;
2594 LLVMTypeRef vec_type = bld_base->base.vec_type;
2595 LLVMValueRef array_size = lp_build_const_int32(gallivm,
2596 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2597 bld->inputs_array = lp_build_array_alloca(gallivm,
2598 vec_type, array_size,
2599 "input_array");
2600
2601 assert(bld_base->info->num_inputs
2602 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2603
2604 for (index = 0; index < bld_base->info->num_inputs; ++index) {
2605 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2606 LLVMValueRef lindex =
2607 lp_build_const_int32(gallivm, index * 4 + chan);
2608 LLVMValueRef input_ptr =
2609 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2610 &lindex, 1, "");
2611 LLVMValueRef value = bld->inputs[index][chan];
2612 if (value)
2613 LLVMBuildStore(gallivm->builder, value, input_ptr);
2614 }
2615 }
2616 }
2617
2618 if (bld->gs_iface) {
2619 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
2620 bld->emitted_prims_vec_ptr =
2621 lp_build_alloca(gallivm,
2622 uint_bld->vec_type,
2623 "emitted_prims_ptr");
2624 bld->emitted_vertices_vec_ptr =
2625 lp_build_alloca(gallivm,
2626 uint_bld->vec_type,
2627 "emitted_vertices_ptr");
2628 bld->total_emitted_vertices_vec_ptr =
2629 lp_build_alloca(gallivm,
2630 uint_bld->vec_type,
2631 "total_emitted_vertices_ptr");
2632
2633 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2634 bld->emitted_prims_vec_ptr);
2635 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2636 bld->emitted_vertices_vec_ptr);
2637 LLVMBuildStore(gallivm->builder, uint_bld->zero,
2638 bld->total_emitted_vertices_vec_ptr);
2639 }
2640 }
2641
2642 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
2643 {
2644 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2645 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2646
2647 if (0) {
2648 /* for debugging */
2649 emit_dump_temps(bld);
2650 }
2651
2652 /* If we have indirect addressing in outputs we need to copy our alloca array
2653 * to the outputs slots specified by the caller */
2654 if (bld->gs_iface) {
2655 LLVMValueRef total_emitted_vertices_vec;
2656 LLVMValueRef emitted_prims_vec;
2657 /* flush the accumulated vertices as a primitive */
2658 if (bld->pending_end_primitive) {
2659 end_primitive(NULL, bld_base, NULL);
2660 bld->pending_end_primitive = FALSE;
2661 }
2662 total_emitted_vertices_vec =
2663 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
2664 emitted_prims_vec =
2665 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
2666
2667 bld->gs_iface->gs_epilogue(bld->gs_iface,
2668 &bld->bld_base,
2669 total_emitted_vertices_vec,
2670 emitted_prims_vec);
2671 } else {
2672 gather_outputs(bld);
2673 }
2674 }
2675
2676 void
2677 lp_build_tgsi_soa(struct gallivm_state *gallivm,
2678 const struct tgsi_token *tokens,
2679 struct lp_type type,
2680 struct lp_build_mask_context *mask,
2681 LLVMValueRef consts_ptr,
2682 const struct lp_bld_tgsi_system_values *system_values,
2683 const LLVMValueRef *pos,
2684 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
2685 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
2686 struct lp_build_sampler_soa *sampler,
2687 const struct tgsi_shader_info *info,
2688 const struct lp_build_tgsi_gs_iface *gs_iface)
2689 {
2690 struct lp_build_tgsi_soa_context bld;
2691
2692 struct lp_type res_type;
2693
2694 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2695 memset(&res_type, 0, sizeof res_type);
2696 res_type.width = type.width;
2697 res_type.length = type.length;
2698 res_type.sign = 1;
2699
2700 /* Setup build context */
2701 memset(&bld, 0, sizeof bld);
2702 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2703 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2704 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2705 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2706 bld.mask = mask;
2707 bld.pos = pos;
2708 bld.inputs = inputs;
2709 bld.outputs = outputs;
2710 bld.consts_ptr = consts_ptr;
2711 bld.sampler = sampler;
2712 bld.bld_base.info = info;
2713 bld.indirect_files = info->indirect_files;
2714
2715 bld.bld_base.soa = TRUE;
2716 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2717 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2718 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2719 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2720 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2721 bld.bld_base.emit_store = emit_store;
2722
2723 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2724 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2725
2726 bld.bld_base.emit_prologue = emit_prologue;
2727 bld.bld_base.emit_epilogue = emit_epilogue;
2728
2729 /* Set opcode actions */
2730 lp_set_default_actions_cpu(&bld.bld_base);
2731
2732 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2733 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2734 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2735 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
2736 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2737 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2738 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2739 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2740 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2741 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2742 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2743 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2744 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2745 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2746 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2747 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2748 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2749 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2750 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2751 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2752 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2753 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2754 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2755 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2756 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
2757 /* DX10 sampling ops */
2758 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
2759 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
2760 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
2761 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
2762 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
2763 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
2764 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
2765 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
2766
2767 if (gs_iface) {
2768 /* inputs are always indirect with gs */
2769 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
2770 bld.gs_iface = gs_iface;
2771 bld.pending_end_primitive = FALSE;
2772 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
2773 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
2774 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
2775 }
2776
2777 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2778
2779 bld.system_values = *system_values;
2780
2781 lp_build_tgsi_llvm(&bld.bld_base, tokens);
2782
2783 if (0) {
2784 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2785 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2786 debug_printf("11111111111111111111111111111 \n");
2787 tgsi_dump(tokens, 0);
2788 lp_debug_dump_value(function);
2789 debug_printf("2222222222222222222222222222 \n");
2790 }
2791
2792 if (0) {
2793 LLVMModuleRef module = LLVMGetGlobalParent(
2794 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2795 LLVMDumpModule(module);
2796
2797 }
2798 }