gallivm: fix up size queries for dx10 sviewinfo opcode
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68
69 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
70 {
71 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
72 LLVMBuilderRef builder = bld->gallivm->builder;
73
74 mask->bld = bld;
75 mask->has_mask = FALSE;
76 mask->cond_stack_size = 0;
77 mask->loop_stack_size = 0;
78 mask->call_stack_size = 0;
79
80 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
81 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
82 LLVMConstAllOnes(mask->int_vec_type);
83
84 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
85
86 LLVMBuildStore(
87 builder,
88 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
89 mask->loop_limiter);
90 }
91
92 static void lp_exec_mask_update(struct lp_exec_mask *mask)
93 {
94 LLVMBuilderRef builder = mask->bld->gallivm->builder;
95
96 if (mask->loop_stack_size) {
97 /*for loops we need to update the entire mask at runtime */
98 LLVMValueRef tmp;
99 assert(mask->break_mask);
100 tmp = LLVMBuildAnd(builder,
101 mask->cont_mask,
102 mask->break_mask,
103 "maskcb");
104 mask->exec_mask = LLVMBuildAnd(builder,
105 mask->cond_mask,
106 tmp,
107 "maskfull");
108 } else
109 mask->exec_mask = mask->cond_mask;
110
111 if (mask->call_stack_size) {
112 mask->exec_mask = LLVMBuildAnd(builder,
113 mask->exec_mask,
114 mask->ret_mask,
115 "callmask");
116 }
117
118 mask->has_mask = (mask->cond_stack_size > 0 ||
119 mask->loop_stack_size > 0 ||
120 mask->call_stack_size > 0);
121 }
122
123 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
124 LLVMValueRef val)
125 {
126 LLVMBuilderRef builder = mask->bld->gallivm->builder;
127
128 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
129 if (mask->cond_stack_size == 0) {
130 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
131 }
132 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
133 assert(LLVMTypeOf(val) == mask->int_vec_type);
134 mask->cond_mask = LLVMBuildAnd(builder,
135 mask->cond_mask,
136 val,
137 "");
138 lp_exec_mask_update(mask);
139 }
140
141 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
142 {
143 LLVMBuilderRef builder = mask->bld->gallivm->builder;
144 LLVMValueRef prev_mask;
145 LLVMValueRef inv_mask;
146
147 assert(mask->cond_stack_size);
148 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
149 if (mask->cond_stack_size == 1) {
150 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
151 }
152
153 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
154
155 mask->cond_mask = LLVMBuildAnd(builder,
156 inv_mask,
157 prev_mask, "");
158 lp_exec_mask_update(mask);
159 }
160
161 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
162 {
163 assert(mask->cond_stack_size);
164 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
165 lp_exec_mask_update(mask);
166 }
167
168 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
169 {
170 LLVMBuilderRef builder = mask->bld->gallivm->builder;
171
172 if (mask->loop_stack_size == 0) {
173 assert(mask->loop_block == NULL);
174 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
175 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
176 assert(mask->break_var == NULL);
177 }
178
179 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
180
181 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
182 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
183 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
184 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
185 ++mask->loop_stack_size;
186
187 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
188 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
189
190 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
191
192 LLVMBuildBr(builder, mask->loop_block);
193 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
194
195 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
196
197 lp_exec_mask_update(mask);
198 }
199
200 static void lp_exec_break(struct lp_exec_mask *mask)
201 {
202 LLVMBuilderRef builder = mask->bld->gallivm->builder;
203 LLVMValueRef exec_mask = LLVMBuildNot(builder,
204 mask->exec_mask,
205 "break");
206
207 mask->break_mask = LLVMBuildAnd(builder,
208 mask->break_mask,
209 exec_mask, "break_full");
210
211 lp_exec_mask_update(mask);
212 }
213
214 static void lp_exec_continue(struct lp_exec_mask *mask)
215 {
216 LLVMBuilderRef builder = mask->bld->gallivm->builder;
217 LLVMValueRef exec_mask = LLVMBuildNot(builder,
218 mask->exec_mask,
219 "");
220
221 mask->cont_mask = LLVMBuildAnd(builder,
222 mask->cont_mask,
223 exec_mask, "");
224
225 lp_exec_mask_update(mask);
226 }
227
228
229 static void lp_exec_endloop(struct gallivm_state *gallivm,
230 struct lp_exec_mask *mask)
231 {
232 LLVMBuilderRef builder = mask->bld->gallivm->builder;
233 LLVMBasicBlockRef endloop;
234 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
235 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
236 mask->bld->type.width *
237 mask->bld->type.length);
238 LLVMValueRef i1cond, i2cond, icond, limiter;
239
240 assert(mask->break_mask);
241
242 /*
243 * Restore the cont_mask, but don't pop
244 */
245 assert(mask->loop_stack_size);
246 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
247 lp_exec_mask_update(mask);
248
249 /*
250 * Unlike the continue mask, the break_mask must be preserved across loop
251 * iterations
252 */
253 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
254
255 /* Decrement the loop limiter */
256 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
257
258 limiter = LLVMBuildSub(
259 builder,
260 limiter,
261 LLVMConstInt(int_type, 1, false),
262 "");
263
264 LLVMBuildStore(builder, limiter, mask->loop_limiter);
265
266 /* i1cond = (mask != 0) */
267 i1cond = LLVMBuildICmp(
268 builder,
269 LLVMIntNE,
270 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
271 LLVMConstNull(reg_type), "");
272
273 /* i2cond = (looplimiter > 0) */
274 i2cond = LLVMBuildICmp(
275 builder,
276 LLVMIntSGT,
277 limiter,
278 LLVMConstNull(int_type), "");
279
280 /* if( i1cond && i2cond ) */
281 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
282
283 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
284
285 LLVMBuildCondBr(builder,
286 icond, mask->loop_block, endloop);
287
288 LLVMPositionBuilderAtEnd(builder, endloop);
289
290 assert(mask->loop_stack_size);
291 --mask->loop_stack_size;
292 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
293 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
294 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
295 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
296
297 lp_exec_mask_update(mask);
298 }
299
300 /* stores val into an address pointed to by dst.
301 * mask->exec_mask is used to figure out which bits of val
302 * should be stored into the address
303 * (0 means don't store this bit, 1 means do store).
304 */
305 static void lp_exec_mask_store(struct lp_exec_mask *mask,
306 struct lp_build_context *bld_store,
307 LLVMValueRef pred,
308 LLVMValueRef val,
309 LLVMValueRef dst)
310 {
311 LLVMBuilderRef builder = mask->bld->gallivm->builder;
312
313 /* Mix the predicate and execution mask */
314 if (mask->has_mask) {
315 if (pred) {
316 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
317 } else {
318 pred = mask->exec_mask;
319 }
320 }
321
322 if (pred) {
323 LLVMValueRef real_val, dst_val;
324
325 dst_val = LLVMBuildLoad(builder, dst, "");
326 real_val = lp_build_select(bld_store,
327 pred,
328 val, dst_val);
329
330 LLVMBuildStore(builder, real_val, dst);
331 } else
332 LLVMBuildStore(builder, val, dst);
333 }
334
335 static void lp_exec_mask_call(struct lp_exec_mask *mask,
336 int func,
337 int *pc)
338 {
339 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
340 mask->call_stack[mask->call_stack_size].pc = *pc;
341 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
342 mask->call_stack_size++;
343 *pc = func;
344 }
345
346 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
347 {
348 LLVMBuilderRef builder = mask->bld->gallivm->builder;
349 LLVMValueRef exec_mask;
350
351 if (mask->call_stack_size == 0) {
352 /* returning from main() */
353 *pc = -1;
354 return;
355 }
356 exec_mask = LLVMBuildNot(builder,
357 mask->exec_mask,
358 "ret");
359
360 mask->ret_mask = LLVMBuildAnd(builder,
361 mask->ret_mask,
362 exec_mask, "ret_full");
363
364 lp_exec_mask_update(mask);
365 }
366
367 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
368 {
369 }
370
371 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
372 {
373 assert(mask->call_stack_size);
374 mask->call_stack_size--;
375 *pc = mask->call_stack[mask->call_stack_size].pc;
376 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
377 lp_exec_mask_update(mask);
378 }
379
380
381 /**
382 * Return pointer to a temporary register channel (src or dest).
383 * Note that indirect addressing cannot be handled here.
384 * \param index which temporary register
385 * \param chan which channel of the temp register.
386 */
387 LLVMValueRef
388 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
389 unsigned index,
390 unsigned chan)
391 {
392 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
393 assert(chan < 4);
394 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
395 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
396 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
397 }
398 else {
399 return bld->temps[index][chan];
400 }
401 }
402
403 /**
404 * Return pointer to a output register channel (src or dest).
405 * Note that indirect addressing cannot be handled here.
406 * \param index which output register
407 * \param chan which channel of the output register.
408 */
409 LLVMValueRef
410 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
411 unsigned index,
412 unsigned chan)
413 {
414 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
415 assert(chan < 4);
416 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
417 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
418 index * 4 + chan);
419 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
420 }
421 else {
422 return bld->outputs[index][chan];
423 }
424 }
425
426 /**
427 * Gather vector.
428 * XXX the lp_build_gather() function should be capable of doing this
429 * with a little work.
430 */
431 static LLVMValueRef
432 build_gather(struct lp_build_context *bld,
433 LLVMValueRef base_ptr,
434 LLVMValueRef indexes)
435 {
436 LLVMBuilderRef builder = bld->gallivm->builder;
437 LLVMValueRef res = bld->undef;
438 unsigned i;
439
440 /*
441 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
442 */
443 for (i = 0; i < bld->type.length; i++) {
444 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
445 LLVMValueRef index = LLVMBuildExtractElement(builder,
446 indexes, ii, "");
447 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
448 &index, 1, "gather_ptr");
449 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
450
451 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
452 }
453
454 return res;
455 }
456
457
458 /**
459 * Scatter/store vector.
460 */
461 static void
462 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
463 LLVMValueRef base_ptr,
464 LLVMValueRef indexes,
465 LLVMValueRef values,
466 struct lp_exec_mask *mask,
467 LLVMValueRef pred)
468 {
469 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
470 LLVMBuilderRef builder = gallivm->builder;
471 unsigned i;
472
473 /* Mix the predicate and execution mask */
474 if (mask->has_mask) {
475 if (pred) {
476 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
477 }
478 else {
479 pred = mask->exec_mask;
480 }
481 }
482
483 /*
484 * Loop over elements of index_vec, store scalar value.
485 */
486 for (i = 0; i < bld->bld_base.base.type.length; i++) {
487 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
488 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
489 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
490 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
491 LLVMValueRef scalar_pred = pred ?
492 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
493
494 if (0)
495 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
496 ii, val, index, scalar_ptr);
497
498 if (scalar_pred) {
499 LLVMValueRef real_val, dst_val;
500 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
501 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
502 LLVMBuildStore(builder, real_val, scalar_ptr);
503 }
504 else {
505 LLVMBuildStore(builder, val, scalar_ptr);
506 }
507 }
508 }
509
510
511 /**
512 * Read the current value of the ADDR register, convert the floats to
513 * ints, add the base index and return the vector of offsets.
514 * The offsets will be used to index into the constant buffer or
515 * temporary register file.
516 */
517 static LLVMValueRef
518 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
519 unsigned reg_file, unsigned reg_index,
520 const struct tgsi_src_register *indirect_reg)
521 {
522 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
523 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
524 /* always use X component of address register */
525 unsigned swizzle = indirect_reg->SwizzleX;
526 LLVMValueRef base;
527 LLVMValueRef rel;
528 LLVMValueRef max_index;
529 LLVMValueRef index;
530
531 assert(bld->indirect_files & (1 << reg_file));
532
533 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
534
535 assert(swizzle < 4);
536 switch (indirect_reg->File) {
537 case TGSI_FILE_ADDRESS:
538 rel = LLVMBuildLoad(builder,
539 bld->addr[indirect_reg->Index][swizzle],
540 "load addr reg");
541 /* ADDR LLVM values already have LLVM integer type. */
542 break;
543 case TGSI_FILE_TEMPORARY:
544 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
545 rel = LLVMBuildLoad(builder, rel, "load temp reg");
546 /* TEMP LLVM values always have LLVM float type, but for indirection, the
547 * value actually stored is expected to be an integer */
548 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
549 break;
550 default:
551 assert(0);
552 rel = uint_bld->zero;
553 }
554
555 index = lp_build_add(uint_bld, base, rel);
556
557 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
558 uint_bld->type,
559 bld->bld_base.info->file_max[reg_file]);
560
561 assert(!uint_bld->type.sign);
562 index = lp_build_min(uint_bld, index, max_index);
563
564 return index;
565 }
566
567 static struct lp_build_context *
568 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
569 enum tgsi_opcode_type stype)
570 {
571 struct lp_build_context *bld_fetch;
572
573 switch (stype) {
574 case TGSI_TYPE_FLOAT:
575 case TGSI_TYPE_UNTYPED:
576 bld_fetch = &bld_base->base;
577 break;
578 case TGSI_TYPE_UNSIGNED:
579 bld_fetch = &bld_base->uint_bld;
580 break;
581 case TGSI_TYPE_SIGNED:
582 bld_fetch = &bld_base->int_bld;
583 break;
584 case TGSI_TYPE_VOID:
585 case TGSI_TYPE_DOUBLE:
586 default:
587 assert(0);
588 bld_fetch = NULL;
589 break;
590 }
591 return bld_fetch;
592 }
593
594 static LLVMValueRef
595 emit_fetch_constant(
596 struct lp_build_tgsi_context * bld_base,
597 const struct tgsi_full_src_register * reg,
598 enum tgsi_opcode_type stype,
599 unsigned swizzle)
600 {
601 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
602 struct gallivm_state *gallivm = bld_base->base.gallivm;
603 LLVMBuilderRef builder = gallivm->builder;
604 struct lp_build_context *uint_bld = &bld_base->uint_bld;
605 LLVMValueRef indirect_index = NULL;
606 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
607 unsigned dimension = 0;
608 LLVMValueRef dimension_index;
609 LLVMValueRef consts_ptr;
610
611 /* XXX: Handle fetching xyzw components as a vector */
612 assert(swizzle != ~0);
613
614 if (reg->Register.Dimension) {
615 assert(!reg->Dimension.Indirect);
616 dimension = reg->Dimension.Index;
617 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
618 }
619
620 dimension_index = lp_build_const_int32(gallivm, dimension);
621 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
622
623 if (reg->Register.Indirect) {
624 indirect_index = get_indirect_index(bld,
625 reg->Register.File,
626 reg->Register.Index,
627 &reg->Indirect);
628 }
629
630 if (reg->Register.Indirect) {
631 LLVMValueRef swizzle_vec =
632 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
633 LLVMValueRef index_vec; /* index into the const buffer */
634
635 /* index_vec = indirect_index * 4 + swizzle */
636 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
637 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
638
639 /* Gather values from the constant buffer */
640 return build_gather(bld_fetch, consts_ptr, index_vec);
641 }
642 else {
643 LLVMValueRef index; /* index into the const buffer */
644 LLVMValueRef scalar, scalar_ptr;
645
646 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
647
648 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
649 &index, 1, "");
650
651 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
652 LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
653 LLVMValueRef temp_ptr;
654 temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
655 scalar = LLVMBuildLoad(builder, temp_ptr, "");
656 } else
657 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
658
659 return lp_build_broadcast_scalar(bld_fetch, scalar);
660 }
661 }
662
663 static LLVMValueRef
664 emit_fetch_immediate(
665 struct lp_build_tgsi_context * bld_base,
666 const struct tgsi_full_src_register * reg,
667 enum tgsi_opcode_type stype,
668 unsigned swizzle)
669 {
670 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
671 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
672 assert(res);
673
674 if (stype == TGSI_TYPE_UNSIGNED) {
675 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
676 } else if (stype == TGSI_TYPE_SIGNED) {
677 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
678 }
679 return res;
680 }
681
682 static LLVMValueRef
683 emit_fetch_input(
684 struct lp_build_tgsi_context * bld_base,
685 const struct tgsi_full_src_register * reg,
686 enum tgsi_opcode_type stype,
687 unsigned swizzle)
688 {
689 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
690 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
691 LLVMBuilderRef builder = gallivm->builder;
692 struct lp_build_context *uint_bld = &bld_base->uint_bld;
693 LLVMValueRef indirect_index = NULL;
694 LLVMValueRef res;
695
696 if (reg->Register.Indirect) {
697 indirect_index = get_indirect_index(bld,
698 reg->Register.File,
699 reg->Register.Index,
700 &reg->Indirect);
701 }
702
703 if (reg->Register.Indirect) {
704 LLVMValueRef swizzle_vec =
705 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
706 LLVMValueRef length_vec =
707 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
708 LLVMValueRef index_vec; /* index into the const buffer */
709 LLVMValueRef inputs_array;
710 LLVMTypeRef float4_ptr_type;
711
712 /* index_vec = (indirect_index * 4 + swizzle) * length */
713 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
714 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
715 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
716
717 /* cast inputs_array pointer to float* */
718 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
719 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
720 float4_ptr_type, "");
721
722 /* Gather values from the temporary register array */
723 res = build_gather(&bld_base->base, inputs_array, index_vec);
724 } else {
725 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
726 LLVMValueRef lindex = lp_build_const_int32(gallivm,
727 reg->Register.Index * 4 + swizzle);
728 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
729 bld->inputs_array, &lindex, 1, "");
730 res = LLVMBuildLoad(builder, input_ptr, "");
731 }
732 else {
733 res = bld->inputs[reg->Register.Index][swizzle];
734 }
735 }
736
737 assert(res);
738
739 if (stype == TGSI_TYPE_UNSIGNED) {
740 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
741 } else if (stype == TGSI_TYPE_SIGNED) {
742 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
743 }
744
745 return res;
746 }
747
748 static LLVMValueRef
749 emit_fetch_temporary(
750 struct lp_build_tgsi_context * bld_base,
751 const struct tgsi_full_src_register * reg,
752 enum tgsi_opcode_type stype,
753 unsigned swizzle)
754 {
755 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
756 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
757 LLVMBuilderRef builder = gallivm->builder;
758 struct lp_build_context *uint_bld = &bld_base->uint_bld;
759 LLVMValueRef indirect_index = NULL;
760 LLVMValueRef res;
761
762 if (reg->Register.Indirect) {
763 indirect_index = get_indirect_index(bld,
764 reg->Register.File,
765 reg->Register.Index,
766 &reg->Indirect);
767 }
768
769 if (reg->Register.Indirect) {
770 LLVMValueRef swizzle_vec =
771 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
772 LLVMValueRef length_vec =
773 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
774 bld->bld_base.base.type.length);
775 LLVMValueRef index_vec; /* index into the const buffer */
776 LLVMValueRef temps_array;
777 LLVMTypeRef float4_ptr_type;
778
779 /* index_vec = (indirect_index * 4 + swizzle) * length */
780 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
781 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
782 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
783
784 /* cast temps_array pointer to float* */
785 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
786 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
787 float4_ptr_type, "");
788
789 /* Gather values from the temporary register array */
790 res = build_gather(&bld_base->base, temps_array, index_vec);
791 }
792 else {
793 LLVMValueRef temp_ptr;
794 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
795 LLVMTypeRef itype = LLVMPointerType(bld->bld_base.int_bld.vec_type, 0);
796 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
797 swizzle);
798 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
799 } else
800 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
801 res = LLVMBuildLoad(builder, temp_ptr, "");
802 if (!res)
803 return bld->bld_base.base.undef;
804 }
805
806 return res;
807 }
808
809 static LLVMValueRef
810 emit_fetch_system_value(
811 struct lp_build_tgsi_context * bld_base,
812 const struct tgsi_full_src_register * reg,
813 enum tgsi_opcode_type stype,
814 unsigned swizzle)
815 {
816 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
817 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
818 const struct tgsi_shader_info *info = bld->bld_base.info;
819 LLVMBuilderRef builder = gallivm->builder;
820 LLVMValueRef res;
821 enum tgsi_opcode_type atype; // Actual type of the value
822
823 assert(!reg->Register.Indirect);
824
825 switch (info->system_value_semantic_name[reg->Register.Index]) {
826 case TGSI_SEMANTIC_INSTANCEID:
827 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
828 atype = TGSI_TYPE_UNSIGNED;
829 break;
830
831 case TGSI_SEMANTIC_VERTEXID:
832 res = bld->system_values.vertex_id;
833 atype = TGSI_TYPE_UNSIGNED;
834 break;
835
836 default:
837 assert(!"unexpected semantic in emit_fetch_system_value");
838 res = bld_base->base.zero;
839 atype = TGSI_TYPE_FLOAT;
840 break;
841 }
842
843 if (atype != stype) {
844 if (stype == TGSI_TYPE_FLOAT) {
845 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
846 } else if (stype == TGSI_TYPE_UNSIGNED) {
847 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
848 } else if (stype == TGSI_TYPE_SIGNED) {
849 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
850 }
851 }
852
853 return res;
854 }
855
856 /**
857 * Register fetch with derivatives.
858 */
859 static void
860 emit_fetch_deriv(
861 struct lp_build_tgsi_soa_context *bld,
862 LLVMValueRef src,
863 LLVMValueRef *res,
864 LLVMValueRef *ddx,
865 LLVMValueRef *ddy)
866 {
867 if(res)
868 *res = src;
869
870 /* TODO: use interpolation coeffs for inputs */
871
872 if(ddx)
873 *ddx = lp_build_ddx(&bld->bld_base.base, src);
874
875 if(ddy)
876 *ddy = lp_build_ddy(&bld->bld_base.base, src);
877 }
878
879
880 /**
881 * Predicate.
882 */
883 static void
884 emit_fetch_predicate(
885 struct lp_build_tgsi_soa_context *bld,
886 const struct tgsi_full_instruction *inst,
887 LLVMValueRef *pred)
888 {
889 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
890 unsigned index;
891 unsigned char swizzles[4];
892 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
893 LLVMValueRef value;
894 unsigned chan;
895
896 if (!inst->Instruction.Predicate) {
897 TGSI_FOR_EACH_CHANNEL( chan ) {
898 pred[chan] = NULL;
899 }
900 return;
901 }
902
903 swizzles[0] = inst->Predicate.SwizzleX;
904 swizzles[1] = inst->Predicate.SwizzleY;
905 swizzles[2] = inst->Predicate.SwizzleZ;
906 swizzles[3] = inst->Predicate.SwizzleW;
907
908 index = inst->Predicate.Index;
909 assert(index < LP_MAX_TGSI_PREDS);
910
911 TGSI_FOR_EACH_CHANNEL( chan ) {
912 unsigned swizzle = swizzles[chan];
913
914 /*
915 * Only fetch the predicate register channels that are actually listed
916 * in the swizzles
917 */
918 if (!unswizzled[swizzle]) {
919 value = LLVMBuildLoad(builder,
920 bld->preds[index][swizzle], "");
921
922 /*
923 * Convert the value to an integer mask.
924 *
925 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
926 * is needlessly causing two comparisons due to storing the intermediate
927 * result as float vector instead of an integer mask vector.
928 */
929 value = lp_build_compare(bld->bld_base.base.gallivm,
930 bld->bld_base.base.type,
931 PIPE_FUNC_NOTEQUAL,
932 value,
933 bld->bld_base.base.zero);
934 if (inst->Predicate.Negate) {
935 value = LLVMBuildNot(builder, value, "");
936 }
937
938 unswizzled[swizzle] = value;
939 } else {
940 value = unswizzled[swizzle];
941 }
942
943 pred[chan] = value;
944 }
945 }
946
947 /**
948 * Register store.
949 */
950 static void
951 emit_store_chan(
952 struct lp_build_tgsi_context *bld_base,
953 const struct tgsi_full_instruction *inst,
954 unsigned index,
955 unsigned chan_index,
956 LLVMValueRef pred,
957 LLVMValueRef value)
958 {
959 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
960 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
961 LLVMBuilderRef builder = gallivm->builder;
962 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
963 struct lp_build_context *uint_bld = &bld_base->uint_bld;
964 LLVMValueRef indirect_index = NULL;
965 struct lp_build_context *bld_store;
966 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
967
968 switch (dtype) {
969 default:
970 case TGSI_TYPE_FLOAT:
971 case TGSI_TYPE_UNTYPED:
972 bld_store = &bld_base->base;
973 break;
974 case TGSI_TYPE_UNSIGNED:
975 bld_store = &bld_base->uint_bld;
976 break;
977 case TGSI_TYPE_SIGNED:
978 bld_store = &bld_base->int_bld;
979 break;
980 case TGSI_TYPE_DOUBLE:
981 case TGSI_TYPE_VOID:
982 assert(0);
983 bld_store = NULL;
984 break;
985 }
986
987 switch( inst->Instruction.Saturate ) {
988 case TGSI_SAT_NONE:
989 break;
990
991 case TGSI_SAT_ZERO_ONE:
992 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
993 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
994 break;
995
996 case TGSI_SAT_MINUS_PLUS_ONE:
997 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
998 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
999 break;
1000
1001 default:
1002 assert(0);
1003 }
1004
1005 if (reg->Register.Indirect) {
1006 indirect_index = get_indirect_index(bld,
1007 reg->Register.File,
1008 reg->Register.Index,
1009 &reg->Indirect);
1010 } else {
1011 assert(reg->Register.Index <=
1012 bld->bld_base.info->file_max[reg->Register.File]);
1013 }
1014
1015 switch( reg->Register.File ) {
1016 case TGSI_FILE_OUTPUT:
1017 if (reg->Register.Indirect) {
1018 LLVMValueRef chan_vec =
1019 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1020 LLVMValueRef length_vec =
1021 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1022 LLVMValueRef index_vec; /* indexes into the temp registers */
1023 LLVMValueRef outputs_array;
1024 LLVMValueRef pixel_offsets;
1025 LLVMTypeRef float_ptr_type;
1026 int i;
1027
1028 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1029 pixel_offsets = uint_bld->undef;
1030 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1031 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1032 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1033 ii, ii, "");
1034 }
1035
1036 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1037 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1038 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1039 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1040 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1041
1042 float_ptr_type =
1043 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1044 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1045 float_ptr_type, "");
1046
1047 /* Scatter store values into temp registers */
1048 emit_mask_scatter(bld, outputs_array, index_vec, value,
1049 &bld->exec_mask, pred);
1050 }
1051 else {
1052 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1053 chan_index);
1054 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1055 }
1056 break;
1057
1058 case TGSI_FILE_TEMPORARY:
1059 if (reg->Register.Indirect) {
1060 LLVMValueRef chan_vec =
1061 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1062 LLVMValueRef length_vec =
1063 lp_build_const_int_vec(gallivm, uint_bld->type,
1064 bld->bld_base.base.type.length);
1065 LLVMValueRef index_vec; /* indexes into the temp registers */
1066 LLVMValueRef temps_array;
1067 LLVMValueRef pixel_offsets;
1068 LLVMTypeRef float_ptr_type;
1069 int i;
1070
1071 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1072 pixel_offsets = uint_bld->undef;
1073 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1074 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1075 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1076 ii, ii, "");
1077 }
1078
1079 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1080 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1081 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1082 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1083 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1084
1085 float_ptr_type =
1086 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1087 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1088 float_ptr_type, "");
1089
1090 /* Scatter store values into temp registers */
1091 emit_mask_scatter(bld, temps_array, index_vec, value,
1092 &bld->exec_mask, pred);
1093 }
1094 else {
1095 LLVMValueRef temp_ptr;
1096
1097 switch (dtype) {
1098 case TGSI_TYPE_UNSIGNED:
1099 case TGSI_TYPE_SIGNED: {
1100 LLVMTypeRef itype = bld_base->int_bld.vec_type;
1101 LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1102 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1103 chan_index);
1104 LLVMValueRef temp_value_ptr;
1105
1106 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1107 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1108 value = temp_value_ptr;
1109 break;
1110 }
1111 default:
1112 case TGSI_TYPE_FLOAT:
1113 case TGSI_TYPE_UNTYPED:
1114 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1115 chan_index);
1116 break;
1117 }
1118
1119 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1120 }
1121 break;
1122
1123 case TGSI_FILE_ADDRESS:
1124 assert(dtype == TGSI_TYPE_SIGNED);
1125 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1126 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1127 bld->addr[reg->Register.Index][chan_index]);
1128 break;
1129
1130 case TGSI_FILE_PREDICATE:
1131 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1132 bld->preds[reg->Register.Index][chan_index]);
1133 break;
1134
1135 default:
1136 assert( 0 );
1137 }
1138 }
1139
1140 static void
1141 emit_store(
1142 struct lp_build_tgsi_context * bld_base,
1143 const struct tgsi_full_instruction * inst,
1144 const struct tgsi_opcode_info * info,
1145 LLVMValueRef dst[4])
1146
1147 {
1148 unsigned chan_index;
1149 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1150
1151 if(info->num_dst) {
1152 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1153
1154 emit_fetch_predicate( bld, inst, pred );
1155
1156 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1157 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1158 }
1159 }
1160 }
1161
1162 /**
1163 * High-level instruction translators.
1164 */
1165
1166 static void
1167 emit_tex( struct lp_build_tgsi_soa_context *bld,
1168 const struct tgsi_full_instruction *inst,
1169 enum lp_build_tex_modifier modifier,
1170 LLVMValueRef *texel)
1171 {
1172 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1173 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1174 unsigned unit;
1175 LLVMValueRef lod_bias, explicit_lod;
1176 LLVMValueRef oow = NULL;
1177 LLVMValueRef coords[4];
1178 LLVMValueRef offsets[3] = { NULL };
1179 struct lp_derivatives derivs;
1180 unsigned num_coords;
1181 unsigned dims;
1182 unsigned i;
1183
1184 if (!bld->sampler) {
1185 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1186 for (i = 0; i < 4; i++) {
1187 texel[i] = bld->bld_base.base.undef;
1188 }
1189 return;
1190 }
1191
1192 derivs.ddx_ddy[0] = bld->bld_base.base.undef;
1193 derivs.ddx_ddy[1] = bld->bld_base.base.undef;
1194
1195 switch (inst->Texture.Texture) {
1196 case TGSI_TEXTURE_1D:
1197 num_coords = 1;
1198 dims = 1;
1199 break;
1200 case TGSI_TEXTURE_1D_ARRAY:
1201 num_coords = 2;
1202 dims = 1;
1203 break;
1204 case TGSI_TEXTURE_2D:
1205 case TGSI_TEXTURE_RECT:
1206 num_coords = 2;
1207 dims = 2;
1208 break;
1209 case TGSI_TEXTURE_SHADOW1D:
1210 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1211 num_coords = 3;
1212 dims = 1;
1213 break;
1214 case TGSI_TEXTURE_SHADOW2D:
1215 case TGSI_TEXTURE_SHADOWRECT:
1216 case TGSI_TEXTURE_2D_ARRAY:
1217 case TGSI_TEXTURE_CUBE:
1218 num_coords = 3;
1219 dims = 2;
1220 break;
1221 case TGSI_TEXTURE_3D:
1222 num_coords = 3;
1223 dims = 3;
1224 break;
1225 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1226 case TGSI_TEXTURE_SHADOWCUBE:
1227 num_coords = 4;
1228 dims = 2;
1229 break;
1230 default:
1231 assert(0);
1232 return;
1233 }
1234
1235 /* Note lod and especially projected are illegal in a LOT of cases */
1236 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1237 assert(num_coords < 4);
1238 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1239 explicit_lod = NULL;
1240 }
1241 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1242 assert(num_coords < 4);
1243 lod_bias = NULL;
1244 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1245 }
1246 else {
1247 lod_bias = NULL;
1248 explicit_lod = NULL;
1249 }
1250
1251 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1252 assert(num_coords < 4);
1253 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1254 oow = lp_build_rcp(&bld->bld_base.base, oow);
1255 }
1256
1257 for (i = 0; i < num_coords; i++) {
1258 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1259 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1260 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1261 }
1262 for (i = num_coords; i < 4; i++) {
1263 coords[i] = bld->bld_base.base.undef;
1264 }
1265
1266 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1267 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1268 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
1269 LLVMValueRef ddxdyonec[3];
1270 unsigned length = bld->bld_base.base.type.length;
1271 unsigned num_quads = length / 4;
1272 unsigned dim;
1273 unsigned quad;
1274
1275 for (dim = 0; dim < dims; ++dim) {
1276 LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1277 LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1278 for (quad = 0; quad < num_quads; ++quad) {
1279 unsigned s1 = 4*quad;
1280 unsigned s2 = 4*quad + length;
1281 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1282 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
1283 shuffles[4*quad + 2] = i32undef;
1284 shuffles[4*quad + 3] = i32undef;
1285 }
1286 ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
1287 LLVMConstVector(shuffles, length), "");
1288 }
1289 if (dims == 1) {
1290 derivs.ddx_ddy[0] = ddxdyonec[0];
1291 }
1292 else if (dims >= 2) {
1293 for (quad = 0; quad < num_quads; ++quad) {
1294 unsigned s1 = 4*quad;
1295 unsigned s2 = 4*quad + length;
1296 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1297 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
1298 shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
1299 shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
1300 }
1301 derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
1302 LLVMConstVector(shuffles, length), "");
1303 if (dims == 3) {
1304 derivs.ddx_ddy[1] = ddxdyonec[2];
1305 }
1306 }
1307 unit = inst->Src[3].Register.Index;
1308 } else {
1309 if (dims == 1) {
1310 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
1311 }
1312 else if (dims >= 2) {
1313 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
1314 coords[0], coords[1]);
1315 if (dims == 3) {
1316 derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
1317 }
1318 }
1319 unit = inst->Src[1].Register.Index;
1320 }
1321
1322 /* some advanced gather instructions (txgo) would require 4 offsets */
1323 if (inst->Texture.NumOffsets == 1) {
1324 unsigned dim;
1325 for (dim = 0; dim < dims; dim++) {
1326 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1327 }
1328 }
1329
1330 bld->sampler->emit_fetch_texel(bld->sampler,
1331 bld->bld_base.base.gallivm,
1332 bld->bld_base.base.type,
1333 FALSE,
1334 unit, unit,
1335 coords,
1336 offsets,
1337 &derivs,
1338 lod_bias, explicit_lod,
1339 texel);
1340 }
1341
1342 static void
1343 emit_sample(struct lp_build_tgsi_soa_context *bld,
1344 const struct tgsi_full_instruction *inst,
1345 enum lp_build_tex_modifier modifier,
1346 LLVMValueRef *texel)
1347 {
1348 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1349 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1350 unsigned texture_unit, sampler_unit;
1351 LLVMValueRef lod_bias, explicit_lod;
1352 LLVMValueRef coords[4];
1353 LLVMValueRef offsets[3] = { NULL };
1354 struct lp_derivatives derivs;
1355 unsigned num_coords, dims;
1356 unsigned i;
1357 boolean compare = FALSE;
1358
1359 if (!bld->sampler) {
1360 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1361 for (i = 0; i < 4; i++) {
1362 texel[i] = bld->bld_base.base.undef;
1363 }
1364 return;
1365 }
1366
1367 derivs.ddx_ddy[0] = bld->bld_base.base.undef;
1368 derivs.ddx_ddy[1] = bld->bld_base.base.undef;
1369
1370 switch (inst->Texture.Texture) {
1371 case TGSI_TEXTURE_SHADOW1D:
1372 compare = TRUE;
1373 /* Fallthrough */
1374 case TGSI_TEXTURE_1D:
1375 num_coords = 1;
1376 dims = 1;
1377 break;
1378 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1379 compare = TRUE;
1380 /* Fallthrough */
1381 case TGSI_TEXTURE_1D_ARRAY:
1382 num_coords = 2;
1383 dims = 1;
1384 break;
1385 case TGSI_TEXTURE_SHADOW2D:
1386 case TGSI_TEXTURE_SHADOWRECT:
1387 compare = TRUE;
1388 /* Fallthrough */
1389 case TGSI_TEXTURE_2D:
1390 case TGSI_TEXTURE_RECT:
1391 num_coords = 2;
1392 dims = 2;
1393 break;
1394 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1395 case TGSI_TEXTURE_SHADOWCUBE:
1396 compare = TRUE;
1397 /* Fallthrough */
1398 case TGSI_TEXTURE_2D_ARRAY:
1399 case TGSI_TEXTURE_CUBE:
1400 num_coords = 3;
1401 dims = 2;
1402 break;
1403 case TGSI_TEXTURE_3D:
1404 num_coords = 3;
1405 dims = 3;
1406 break;
1407 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1408 compare = TRUE;
1409 /* Fallthrough */
1410 case TGSI_TEXTURE_CUBE_ARRAY:
1411 num_coords = 4;
1412 dims = 3;
1413 break;
1414 default:
1415 assert(0);
1416 return;
1417 }
1418
1419 /*
1420 * unlike old-style tex opcodes the texture/sampler indices
1421 * always come from src1 and src2 respectively.
1422 */
1423 texture_unit = inst->Src[1].Register.Index;
1424 sampler_unit = inst->Src[2].Register.Index;
1425
1426 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1427 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1428 explicit_lod = NULL;
1429 }
1430 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1431 /* lod bias comes from src 3.r but explicit lod from 0.a */
1432 lod_bias = NULL;
1433 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1434 }
1435 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
1436 lod_bias = NULL;
1437 /* XXX might be better to explicitly pass the level zero information */
1438 explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
1439 }
1440 else {
1441 lod_bias = NULL;
1442 explicit_lod = NULL;
1443 }
1444
1445 for (i = 0; i < num_coords; i++) {
1446 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1447 }
1448 for (i = num_coords; i < 4; i++) {
1449 coords[i] = bld->bld_base.base.undef;
1450 }
1451 /*
1452 * XXX: whack shadow comparison value into place.
1453 * Should probably fix the interface for separate value
1454 * (it will not work for cube arrays if it is part of coords).
1455 */
1456 if (compare) {
1457 unsigned c_coord = num_coords > 2 ? 3 : 2;
1458 assert(num_coords < 4);
1459 coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
1460 }
1461
1462 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1463 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1464 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
1465 LLVMValueRef ddxdyonec[3];
1466 unsigned length = bld->bld_base.base.type.length;
1467 unsigned num_quads = length / 4;
1468 unsigned dim;
1469 unsigned quad;
1470
1471 for (dim = 0; dim < dims; ++dim) {
1472 LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
1473 LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
1474 for (quad = 0; quad < num_quads; ++quad) {
1475 unsigned s1 = 4*quad;
1476 unsigned s2 = 4*quad + length;
1477 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1478 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
1479 shuffles[4*quad + 2] = i32undef;
1480 shuffles[4*quad + 3] = i32undef;
1481 }
1482 ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
1483 LLVMConstVector(shuffles, length), "");
1484 }
1485 if (dims == 1) {
1486 derivs.ddx_ddy[0] = ddxdyonec[0];
1487 }
1488 else if (dims >= 2) {
1489 for (quad = 0; quad < num_quads; ++quad) {
1490 unsigned s1 = 4*quad;
1491 unsigned s2 = 4*quad + length;
1492 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1493 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
1494 shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
1495 shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
1496 }
1497 derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
1498 LLVMConstVector(shuffles, length), "");
1499 if (dims == 3) {
1500 derivs.ddx_ddy[1] = ddxdyonec[2];
1501 }
1502 }
1503 } else {
1504 if (dims == 1) {
1505 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
1506 }
1507 else if (dims >= 2) {
1508 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
1509 coords[0], coords[1]);
1510 if (dims == 3) {
1511 derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
1512 }
1513 }
1514 }
1515
1516 /* some advanced gather instructions (txgo) would require 4 offsets */
1517 if (inst->Texture.NumOffsets == 1) {
1518 unsigned dim;
1519 for (dim = 0; dim < dims; dim++) {
1520 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1521 }
1522 }
1523
1524 bld->sampler->emit_fetch_texel(bld->sampler,
1525 bld->bld_base.base.gallivm,
1526 bld->bld_base.base.type,
1527 FALSE,
1528 texture_unit, sampler_unit,
1529 coords,
1530 offsets,
1531 &derivs,
1532 lod_bias, explicit_lod,
1533 texel);
1534 }
1535
1536 static void
1537 emit_txf( struct lp_build_tgsi_soa_context *bld,
1538 const struct tgsi_full_instruction *inst,
1539 LLVMValueRef *texel)
1540 {
1541 unsigned unit;
1542 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1543 LLVMValueRef explicit_lod = NULL;
1544 LLVMValueRef coords[3];
1545 LLVMValueRef offsets[3] = { NULL };
1546 struct lp_derivatives derivs;
1547 unsigned num_coords;
1548 unsigned dims;
1549 unsigned i;
1550
1551 if (!bld->sampler) {
1552 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1553 for (i = 0; i < 4; i++) {
1554 texel[i] = coord_undef;
1555 }
1556 return;
1557 }
1558
1559 derivs.ddx_ddy[0] = coord_undef;
1560 derivs.ddx_ddy[1] = coord_undef;
1561
1562 switch (inst->Texture.Texture) {
1563 case TGSI_TEXTURE_1D:
1564 case TGSI_TEXTURE_BUFFER:
1565 num_coords = 1;
1566 dims = 1;
1567 break;
1568 case TGSI_TEXTURE_1D_ARRAY:
1569 num_coords = 2;
1570 dims = 1;
1571 break;
1572 case TGSI_TEXTURE_2D:
1573 case TGSI_TEXTURE_RECT:
1574 num_coords = 2;
1575 dims = 2;
1576 break;
1577 case TGSI_TEXTURE_2D_ARRAY:
1578 num_coords = 3;
1579 dims = 2;
1580 break;
1581 case TGSI_TEXTURE_3D:
1582 num_coords = 3;
1583 dims = 3;
1584 break;
1585 default:
1586 assert(0);
1587 return;
1588 }
1589
1590 /* always have lod except for buffers ? */
1591 if (inst->Texture.Texture != TGSI_TEXTURE_BUFFER) {
1592 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1593 }
1594
1595 for (i = 0; i < num_coords; i++) {
1596 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1597 }
1598 for (i = num_coords; i < 3; i++) {
1599 coords[i] = coord_undef;
1600 }
1601
1602 unit = inst->Src[1].Register.Index;
1603
1604 if (inst->Texture.NumOffsets == 1) {
1605 unsigned dim;
1606 for (dim = 0; dim < dims; dim++) {
1607 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1608 }
1609 }
1610
1611 bld->sampler->emit_fetch_texel(bld->sampler,
1612 bld->bld_base.base.gallivm,
1613 bld->bld_base.base.type,
1614 TRUE,
1615 unit, unit,
1616 coords,
1617 offsets,
1618 &derivs,
1619 NULL, explicit_lod,
1620 texel);
1621 }
1622
1623 static void
1624 emit_size_query( struct lp_build_tgsi_soa_context *bld,
1625 const struct tgsi_full_instruction *inst,
1626 LLVMValueRef *sizes_out,
1627 boolean is_sviewinfo)
1628 {
1629 LLVMValueRef explicit_lod;
1630 unsigned has_lod;
1631 unsigned i;
1632
1633 switch (inst->Texture.Texture) {
1634 case TGSI_TEXTURE_BUFFER:
1635 case TGSI_TEXTURE_RECT:
1636 case TGSI_TEXTURE_SHADOWRECT:
1637 has_lod = 0;
1638 break;
1639 default:
1640 has_lod = 1;
1641 break;
1642 }
1643
1644 if (!bld->sampler) {
1645 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1646 for (i = 0; i < 4; i++)
1647 sizes_out[i] = bld->bld_base.int_bld.undef;
1648 return;
1649 }
1650
1651 if (has_lod)
1652 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
1653 else
1654 explicit_lod = NULL;
1655
1656 bld->sampler->emit_size_query(bld->sampler,
1657 bld->bld_base.base.gallivm,
1658 bld->bld_base.int_bld.type,
1659 inst->Src[1].Register.Index,
1660 is_sviewinfo,
1661 explicit_lod,
1662 sizes_out);
1663 }
1664
1665 static boolean
1666 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1667 int pc)
1668 {
1669 int i;
1670
1671 for (i = 0; i < 5; i++) {
1672 unsigned opcode;
1673
1674 if (pc + i >= bld->bld_base.info->num_instructions)
1675 return TRUE;
1676
1677 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1678
1679 if (opcode == TGSI_OPCODE_END)
1680 return TRUE;
1681
1682 if (opcode == TGSI_OPCODE_TEX ||
1683 opcode == TGSI_OPCODE_TXP ||
1684 opcode == TGSI_OPCODE_TXD ||
1685 opcode == TGSI_OPCODE_TXB ||
1686 opcode == TGSI_OPCODE_TXL ||
1687 opcode == TGSI_OPCODE_TXF ||
1688 opcode == TGSI_OPCODE_TXQ ||
1689 opcode == TGSI_OPCODE_CAL ||
1690 opcode == TGSI_OPCODE_CALLNZ ||
1691 opcode == TGSI_OPCODE_IF ||
1692 opcode == TGSI_OPCODE_IFC ||
1693 opcode == TGSI_OPCODE_BGNLOOP ||
1694 opcode == TGSI_OPCODE_SWITCH)
1695 return FALSE;
1696 }
1697
1698 return TRUE;
1699 }
1700
1701
1702
1703 /**
1704 * Kill fragment if any of the src register values are negative.
1705 */
1706 static void
1707 emit_kil(
1708 struct lp_build_tgsi_soa_context *bld,
1709 const struct tgsi_full_instruction *inst,
1710 int pc)
1711 {
1712 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1713 const struct tgsi_full_src_register *reg = &inst->Src[0];
1714 LLVMValueRef terms[TGSI_NUM_CHANNELS];
1715 LLVMValueRef mask;
1716 unsigned chan_index;
1717
1718 memset(&terms, 0, sizeof terms);
1719
1720 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1721 unsigned swizzle;
1722
1723 /* Unswizzle channel */
1724 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1725
1726 /* Check if the component has not been already tested. */
1727 assert(swizzle < TGSI_NUM_CHANNELS);
1728 if( !terms[swizzle] )
1729 /* TODO: change the comparison operator instead of setting the sign */
1730 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1731 }
1732
1733 mask = NULL;
1734 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1735 if(terms[chan_index]) {
1736 LLVMValueRef chan_mask;
1737
1738 /*
1739 * If term < 0 then mask = 0 else mask = ~0.
1740 */
1741 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1742
1743 if(mask)
1744 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1745 else
1746 mask = chan_mask;
1747 }
1748 }
1749
1750 if(mask) {
1751 lp_build_mask_update(bld->mask, mask);
1752
1753 if (!near_end_of_shader(bld, pc))
1754 lp_build_mask_check(bld->mask);
1755 }
1756 }
1757
1758
1759 /**
1760 * Predicated fragment kill.
1761 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1762 * The only predication is the execution mask which will apply if
1763 * we're inside a loop or conditional.
1764 */
1765 static void
1766 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1767 int pc)
1768 {
1769 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1770 LLVMValueRef mask;
1771
1772 /* For those channels which are "alive", disable fragment shader
1773 * execution.
1774 */
1775 if (bld->exec_mask.has_mask) {
1776 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1777 }
1778 else {
1779 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1780 mask = zero;
1781 }
1782
1783 lp_build_mask_update(bld->mask, mask);
1784
1785 if (!near_end_of_shader(bld, pc))
1786 lp_build_mask_check(bld->mask);
1787 }
1788
1789
1790 /**
1791 * Emit code which will dump the value of all the temporary registers
1792 * to stdout.
1793 */
1794 static void
1795 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1796 {
1797 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1798 LLVMBuilderRef builder = gallivm->builder;
1799 LLVMValueRef temp_ptr;
1800 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1801 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1802 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1803 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1804 int index;
1805 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1806
1807 for (index = 0; index < n; index++) {
1808 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1809 LLVMValueRef v[4][4], res;
1810 int chan;
1811
1812 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1813
1814 for (chan = 0; chan < 4; chan++) {
1815 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1816 res = LLVMBuildLoad(builder, temp_ptr, "");
1817 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1818 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1819 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1820 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1821 }
1822
1823 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1824 v[0][0], v[0][1], v[0][2], v[0][3]);
1825 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1826 v[1][0], v[1][1], v[1][2], v[1][3]);
1827 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1828 v[2][0], v[2][1], v[2][2], v[2][3]);
1829 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1830 v[3][0], v[3][1], v[3][2], v[3][3]);
1831 }
1832 }
1833
1834
1835
1836 void
1837 lp_emit_declaration_soa(
1838 struct lp_build_tgsi_context *bld_base,
1839 const struct tgsi_full_declaration *decl)
1840 {
1841 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1842 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1843 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1844 const unsigned first = decl->Range.First;
1845 const unsigned last = decl->Range.Last;
1846 unsigned idx, i;
1847
1848 for (idx = first; idx <= last; ++idx) {
1849 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1850 switch (decl->Declaration.File) {
1851 case TGSI_FILE_TEMPORARY:
1852 assert(idx < LP_MAX_TGSI_TEMPS);
1853 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1854 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1855 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1856 }
1857 break;
1858
1859 case TGSI_FILE_OUTPUT:
1860 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1861 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1862 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1863 vec_type, "output");
1864 }
1865 break;
1866
1867 case TGSI_FILE_ADDRESS:
1868 /* ADDR registers are the only allocated with an integer LLVM IR type,
1869 * as they are guaranteed to always have integers.
1870 * XXX: Not sure if this exception is worthwhile (or the whole idea of
1871 * an ADDR register for that matter).
1872 */
1873 assert(idx < LP_MAX_TGSI_ADDRS);
1874 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1875 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1876 break;
1877
1878 case TGSI_FILE_PREDICATE:
1879 assert(idx < LP_MAX_TGSI_PREDS);
1880 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1881 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1882 "predicate");
1883 break;
1884
1885 default:
1886 /* don't need to declare other vars */
1887 break;
1888 }
1889 }
1890 }
1891
1892
1893 void lp_emit_immediate_soa(
1894 struct lp_build_tgsi_context *bld_base,
1895 const struct tgsi_full_immediate *imm)
1896 {
1897 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1898 struct gallivm_state * gallivm = bld_base->base.gallivm;
1899
1900 /* simply copy the immediate values into the next immediates[] slot */
1901 unsigned i;
1902 const uint size = imm->Immediate.NrTokens - 1;
1903 assert(size <= 4);
1904 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1905 switch (imm->Immediate.DataType) {
1906 case TGSI_IMM_FLOAT32:
1907 for( i = 0; i < size; ++i )
1908 bld->immediates[bld->num_immediates][i] =
1909 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1910
1911 break;
1912 case TGSI_IMM_UINT32:
1913 for( i = 0; i < size; ++i ) {
1914 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1915 bld->immediates[bld->num_immediates][i] =
1916 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1917 }
1918
1919 break;
1920 case TGSI_IMM_INT32:
1921 for( i = 0; i < size; ++i ) {
1922 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1923 bld->immediates[bld->num_immediates][i] =
1924 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1925 }
1926
1927 break;
1928 }
1929 for( i = size; i < 4; ++i )
1930 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1931
1932 bld->num_immediates++;
1933 }
1934
1935 static void
1936 ddx_emit(
1937 const struct lp_build_tgsi_action * action,
1938 struct lp_build_tgsi_context * bld_base,
1939 struct lp_build_emit_data * emit_data)
1940 {
1941 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1942
1943 emit_fetch_deriv(bld, emit_data->args[0], NULL,
1944 &emit_data->output[emit_data->chan], NULL);
1945 }
1946
1947 static void
1948 ddy_emit(
1949 const struct lp_build_tgsi_action * action,
1950 struct lp_build_tgsi_context * bld_base,
1951 struct lp_build_emit_data * emit_data)
1952 {
1953 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1954
1955 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1956 &emit_data->output[emit_data->chan]);
1957 }
1958
1959 static void
1960 kilp_emit(
1961 const struct lp_build_tgsi_action * action,
1962 struct lp_build_tgsi_context * bld_base,
1963 struct lp_build_emit_data * emit_data)
1964 {
1965 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1966
1967 emit_kilp(bld, bld_base->pc - 1);
1968 }
1969
1970 static void
1971 kil_emit(
1972 const struct lp_build_tgsi_action * action,
1973 struct lp_build_tgsi_context * bld_base,
1974 struct lp_build_emit_data * emit_data)
1975 {
1976 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1977
1978 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
1979 }
1980
1981 static void
1982 tex_emit(
1983 const struct lp_build_tgsi_action * action,
1984 struct lp_build_tgsi_context * bld_base,
1985 struct lp_build_emit_data * emit_data)
1986 {
1987 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1988
1989 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
1990 }
1991
1992 static void
1993 txb_emit(
1994 const struct lp_build_tgsi_action * action,
1995 struct lp_build_tgsi_context * bld_base,
1996 struct lp_build_emit_data * emit_data)
1997 {
1998 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1999
2000 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2001 emit_data->output);
2002 }
2003
2004 static void
2005 txd_emit(
2006 const struct lp_build_tgsi_action * action,
2007 struct lp_build_tgsi_context * bld_base,
2008 struct lp_build_emit_data * emit_data)
2009 {
2010 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2011
2012 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2013 emit_data->output);
2014 }
2015
2016 static void
2017 txl_emit(
2018 const struct lp_build_tgsi_action * action,
2019 struct lp_build_tgsi_context * bld_base,
2020 struct lp_build_emit_data * emit_data)
2021 {
2022 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2023
2024 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2025 emit_data->output);
2026 }
2027
2028 static void
2029 txp_emit(
2030 const struct lp_build_tgsi_action * action,
2031 struct lp_build_tgsi_context * bld_base,
2032 struct lp_build_emit_data * emit_data)
2033 {
2034 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2035
2036 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
2037 emit_data->output);
2038 }
2039
2040 static void
2041 txq_emit(
2042 const struct lp_build_tgsi_action * action,
2043 struct lp_build_tgsi_context * bld_base,
2044 struct lp_build_emit_data * emit_data)
2045 {
2046 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2047
2048 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
2049 }
2050
2051 static void
2052 txf_emit(
2053 const struct lp_build_tgsi_action * action,
2054 struct lp_build_tgsi_context * bld_base,
2055 struct lp_build_emit_data * emit_data)
2056 {
2057 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2058
2059 emit_txf(bld, emit_data->inst, emit_data->output);
2060 }
2061
2062 static void
2063 sample_emit(
2064 const struct lp_build_tgsi_action * action,
2065 struct lp_build_tgsi_context * bld_base,
2066 struct lp_build_emit_data * emit_data)
2067 {
2068 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2069
2070 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2071 emit_data->output);
2072 }
2073
2074 static void
2075 sample_b_emit(
2076 const struct lp_build_tgsi_action * action,
2077 struct lp_build_tgsi_context * bld_base,
2078 struct lp_build_emit_data * emit_data)
2079 {
2080 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2081
2082 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2083 emit_data->output);
2084 }
2085
2086 static void
2087 sample_c_emit(
2088 const struct lp_build_tgsi_action * action,
2089 struct lp_build_tgsi_context * bld_base,
2090 struct lp_build_emit_data * emit_data)
2091 {
2092 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2093 /*
2094 * note that we can ignore this is a comparison instruction here
2095 * since it should be encoded elsewhere (SHADOW target).
2096 */
2097 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2098 emit_data->output);
2099 }
2100
2101 static void
2102 sample_c_lz_emit(
2103 const struct lp_build_tgsi_action * action,
2104 struct lp_build_tgsi_context * bld_base,
2105 struct lp_build_emit_data * emit_data)
2106 {
2107 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2108
2109 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
2110 emit_data->output);
2111 }
2112
2113 static void
2114 sample_d_emit(
2115 const struct lp_build_tgsi_action * action,
2116 struct lp_build_tgsi_context * bld_base,
2117 struct lp_build_emit_data * emit_data)
2118 {
2119 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2120
2121 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
2122 emit_data->output);
2123 }
2124
2125 static void
2126 sample_l_emit(
2127 const struct lp_build_tgsi_action * action,
2128 struct lp_build_tgsi_context * bld_base,
2129 struct lp_build_emit_data * emit_data)
2130 {
2131 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2132
2133 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
2134 emit_data->output);
2135 }
2136
2137 static void
2138 sviewinfo_emit(
2139 const struct lp_build_tgsi_action * action,
2140 struct lp_build_tgsi_context * bld_base,
2141 struct lp_build_emit_data * emit_data)
2142 {
2143 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2144
2145 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
2146 }
2147
2148 static void
2149 cal_emit(
2150 const struct lp_build_tgsi_action * action,
2151 struct lp_build_tgsi_context * bld_base,
2152 struct lp_build_emit_data * emit_data)
2153 {
2154 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2155
2156 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
2157 &bld_base->pc);
2158 }
2159
2160 static void
2161 ret_emit(
2162 const struct lp_build_tgsi_action * action,
2163 struct lp_build_tgsi_context * bld_base,
2164 struct lp_build_emit_data * emit_data)
2165 {
2166 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2167
2168 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
2169 }
2170
2171 static void
2172 brk_emit(
2173 const struct lp_build_tgsi_action * action,
2174 struct lp_build_tgsi_context * bld_base,
2175 struct lp_build_emit_data * emit_data)
2176 {
2177 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2178
2179 lp_exec_break(&bld->exec_mask);
2180 }
2181
2182 static void
2183 if_emit(
2184 const struct lp_build_tgsi_action * action,
2185 struct lp_build_tgsi_context * bld_base,
2186 struct lp_build_emit_data * emit_data)
2187 {
2188 LLVMValueRef tmp;
2189 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2190
2191 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
2192 emit_data->args[0], bld->bld_base.base.zero);
2193 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
2194 }
2195
2196 static void
2197 bgnloop_emit(
2198 const struct lp_build_tgsi_action * action,
2199 struct lp_build_tgsi_context * bld_base,
2200 struct lp_build_emit_data * emit_data)
2201 {
2202 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2203
2204 lp_exec_bgnloop(&bld->exec_mask);
2205 }
2206
2207 static void
2208 bgnsub_emit(
2209 const struct lp_build_tgsi_action * action,
2210 struct lp_build_tgsi_context * bld_base,
2211 struct lp_build_emit_data * emit_data)
2212 {
2213 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2214
2215 lp_exec_mask_bgnsub(&bld->exec_mask);
2216 }
2217
2218 static void
2219 else_emit(
2220 const struct lp_build_tgsi_action * action,
2221 struct lp_build_tgsi_context * bld_base,
2222 struct lp_build_emit_data * emit_data)
2223 {
2224 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2225
2226 lp_exec_mask_cond_invert(&bld->exec_mask);
2227 }
2228
2229 static void
2230 endif_emit(
2231 const struct lp_build_tgsi_action * action,
2232 struct lp_build_tgsi_context * bld_base,
2233 struct lp_build_emit_data * emit_data)
2234 {
2235 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2236
2237 lp_exec_mask_cond_pop(&bld->exec_mask);
2238 }
2239
2240 static void
2241 endloop_emit(
2242 const struct lp_build_tgsi_action * action,
2243 struct lp_build_tgsi_context * bld_base,
2244 struct lp_build_emit_data * emit_data)
2245 {
2246 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2247
2248 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
2249 }
2250
2251 static void
2252 endsub_emit(
2253 const struct lp_build_tgsi_action * action,
2254 struct lp_build_tgsi_context * bld_base,
2255 struct lp_build_emit_data * emit_data)
2256 {
2257 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2258
2259 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
2260 }
2261
2262 static void
2263 cont_emit(
2264 const struct lp_build_tgsi_action * action,
2265 struct lp_build_tgsi_context * bld_base,
2266 struct lp_build_emit_data * emit_data)
2267 {
2268 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2269
2270 lp_exec_continue(&bld->exec_mask);
2271 }
2272
2273 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2274 *
2275 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2276 * from old code, but there is no garauntee that LLVM will use those registers
2277 * for this code.
2278 *
2279 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2280 * should be handled by the emit_data->fetch_args function. */
2281 static void
2282 nrm_emit(
2283 const struct lp_build_tgsi_action * action,
2284 struct lp_build_tgsi_context * bld_base,
2285 struct lp_build_emit_data * emit_data)
2286 {
2287 LLVMValueRef tmp0, tmp1;
2288 LLVMValueRef tmp4 = NULL;
2289 LLVMValueRef tmp5 = NULL;
2290 LLVMValueRef tmp6 = NULL;
2291 LLVMValueRef tmp7 = NULL;
2292 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2293
2294 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2295
2296 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2297 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2298 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2299 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2300
2301 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2302
2303 /* xmm4 = src.x */
2304 /* xmm0 = src.x * src.x */
2305 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2306 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2307 tmp4 = tmp0;
2308 }
2309 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
2310
2311 /* xmm5 = src.y */
2312 /* xmm0 = xmm0 + src.y * src.y */
2313 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
2314 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2315 tmp5 = tmp1;
2316 }
2317 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2318 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2319
2320 /* xmm6 = src.z */
2321 /* xmm0 = xmm0 + src.z * src.z */
2322 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
2323 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2324 tmp6 = tmp1;
2325 }
2326 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2327 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2328
2329 if (dims == 4) {
2330 /* xmm7 = src.w */
2331 /* xmm0 = xmm0 + src.w * src.w */
2332 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2333 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
2334 tmp7 = tmp1;
2335 }
2336 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2337 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2338 }
2339 /* xmm1 = 1 / sqrt(xmm0) */
2340 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
2341 /* dst.x = xmm1 * src.x */
2342 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2343 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
2344 }
2345 /* dst.y = xmm1 * src.y */
2346 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2347 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
2348 }
2349
2350 /* dst.z = xmm1 * src.z */
2351 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2352 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
2353 }
2354 /* dst.w = xmm1 * src.w */
2355 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
2356 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
2357 }
2358 }
2359
2360 /* dst.w = 1.0 */
2361 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
2362 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
2363 }
2364 }
2365
2366 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
2367 {
2368 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2369 struct gallivm_state * gallivm = bld_base->base.gallivm;
2370
2371 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2372 LLVMValueRef array_size =
2373 lp_build_const_int32(gallivm,
2374 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2375 bld->temps_array = lp_build_array_alloca(gallivm,
2376 bld_base->base.vec_type, array_size,
2377 "temp_array");
2378 }
2379
2380 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2381 LLVMValueRef array_size =
2382 lp_build_const_int32(gallivm,
2383 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2384 bld->outputs_array = lp_build_array_alloca(gallivm,
2385 bld_base->base.vec_type, array_size,
2386 "output_array");
2387 }
2388
2389 /* If we have indirect addressing in inputs we need to copy them into
2390 * our alloca array to be able to iterate over them */
2391 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
2392 unsigned index, chan;
2393 LLVMTypeRef vec_type = bld_base->base.vec_type;
2394 LLVMValueRef array_size = lp_build_const_int32(gallivm,
2395 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2396 bld->inputs_array = lp_build_array_alloca(gallivm,
2397 vec_type, array_size,
2398 "input_array");
2399
2400 assert(bld_base->info->num_inputs
2401 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2402
2403 for (index = 0; index < bld_base->info->num_inputs; ++index) {
2404 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2405 LLVMValueRef lindex =
2406 lp_build_const_int32(gallivm, index * 4 + chan);
2407 LLVMValueRef input_ptr =
2408 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2409 &lindex, 1, "");
2410 LLVMValueRef value = bld->inputs[index][chan];
2411 if (value)
2412 LLVMBuildStore(gallivm->builder, value, input_ptr);
2413 }
2414 }
2415 }
2416 }
2417
2418 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
2419 {
2420 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2421
2422 if (0) {
2423 /* for debugging */
2424 emit_dump_temps(bld);
2425 }
2426
2427 /* If we have indirect addressing in outputs we need to copy our alloca array
2428 * to the outputs slots specified by the called */
2429 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2430 unsigned index, chan;
2431 assert(bld_base->info->num_outputs <=
2432 bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
2433 for (index = 0; index < bld_base->info->num_outputs; ++index) {
2434 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2435 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
2436 }
2437 }
2438 }
2439 }
2440
2441 void
2442 lp_build_tgsi_soa(struct gallivm_state *gallivm,
2443 const struct tgsi_token *tokens,
2444 struct lp_type type,
2445 struct lp_build_mask_context *mask,
2446 LLVMValueRef consts_ptr,
2447 const struct lp_bld_tgsi_system_values *system_values,
2448 const LLVMValueRef *pos,
2449 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
2450 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
2451 struct lp_build_sampler_soa *sampler,
2452 const struct tgsi_shader_info *info)
2453 {
2454 struct lp_build_tgsi_soa_context bld;
2455
2456 struct lp_type res_type;
2457
2458 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2459 memset(&res_type, 0, sizeof res_type);
2460 res_type.width = type.width;
2461 res_type.length = type.length;
2462 res_type.sign = 1;
2463
2464 /* Setup build context */
2465 memset(&bld, 0, sizeof bld);
2466 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2467 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2468 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2469 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2470 bld.mask = mask;
2471 bld.pos = pos;
2472 bld.inputs = inputs;
2473 bld.outputs = outputs;
2474 bld.consts_ptr = consts_ptr;
2475 bld.sampler = sampler;
2476 bld.bld_base.info = info;
2477 bld.indirect_files = info->indirect_files;
2478
2479 bld.bld_base.soa = TRUE;
2480 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2481 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2482 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2483 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2484 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2485 bld.bld_base.emit_store = emit_store;
2486
2487 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2488 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2489
2490 bld.bld_base.emit_prologue = emit_prologue;
2491 bld.bld_base.emit_epilogue = emit_epilogue;
2492
2493 /* Set opcode actions */
2494 lp_set_default_actions_cpu(&bld.bld_base);
2495
2496 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2497 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2498 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2499 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2500 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2501 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2502 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2503 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2504 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2505 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2506 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2507 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2508 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2509 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2510 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2511 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2512 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2513 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2514 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2515 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2516 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2517 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2518 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2519 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
2520 /* DX10 sampling ops */
2521 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
2522 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
2523 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
2524 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
2525 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
2526 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = txf_emit;
2527 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
2528 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
2529
2530 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2531
2532 bld.system_values = *system_values;
2533
2534 lp_build_tgsi_llvm(&bld.bld_base, tokens);
2535
2536 if (0) {
2537 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2538 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2539 debug_printf("11111111111111111111111111111 \n");
2540 tgsi_dump(tokens, 0);
2541 lp_debug_dump_value(function);
2542 debug_printf("2222222222222222222222222222 \n");
2543 }
2544
2545 if (0) {
2546 LLVMModuleRef module = LLVMGetGlobalParent(
2547 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2548 LLVMDumpModule(module);
2549
2550 }
2551 }