gallivm,llvmpipe,draw: Support multiple constant buffers.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65 #include "lp_bld_sample.h"
66 #include "lp_bld_struct.h"
67
68
69 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
70 {
71 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
72 LLVMBuilderRef builder = bld->gallivm->builder;
73
74 mask->bld = bld;
75 mask->has_mask = FALSE;
76 mask->cond_stack_size = 0;
77 mask->loop_stack_size = 0;
78 mask->call_stack_size = 0;
79
80 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
81 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
82 LLVMConstAllOnes(mask->int_vec_type);
83
84 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
85
86 LLVMBuildStore(
87 builder,
88 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
89 mask->loop_limiter);
90 }
91
92 static void lp_exec_mask_update(struct lp_exec_mask *mask)
93 {
94 LLVMBuilderRef builder = mask->bld->gallivm->builder;
95
96 if (mask->loop_stack_size) {
97 /*for loops we need to update the entire mask at runtime */
98 LLVMValueRef tmp;
99 assert(mask->break_mask);
100 tmp = LLVMBuildAnd(builder,
101 mask->cont_mask,
102 mask->break_mask,
103 "maskcb");
104 mask->exec_mask = LLVMBuildAnd(builder,
105 mask->cond_mask,
106 tmp,
107 "maskfull");
108 } else
109 mask->exec_mask = mask->cond_mask;
110
111 if (mask->call_stack_size) {
112 mask->exec_mask = LLVMBuildAnd(builder,
113 mask->exec_mask,
114 mask->ret_mask,
115 "callmask");
116 }
117
118 mask->has_mask = (mask->cond_stack_size > 0 ||
119 mask->loop_stack_size > 0 ||
120 mask->call_stack_size > 0);
121 }
122
123 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
124 LLVMValueRef val)
125 {
126 LLVMBuilderRef builder = mask->bld->gallivm->builder;
127
128 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
129 if (mask->cond_stack_size == 0) {
130 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
131 }
132 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
133 assert(LLVMTypeOf(val) == mask->int_vec_type);
134 mask->cond_mask = LLVMBuildAnd(builder,
135 mask->cond_mask,
136 val,
137 "");
138 lp_exec_mask_update(mask);
139 }
140
141 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
142 {
143 LLVMBuilderRef builder = mask->bld->gallivm->builder;
144 LLVMValueRef prev_mask;
145 LLVMValueRef inv_mask;
146
147 assert(mask->cond_stack_size);
148 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
149 if (mask->cond_stack_size == 1) {
150 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
151 }
152
153 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
154
155 mask->cond_mask = LLVMBuildAnd(builder,
156 inv_mask,
157 prev_mask, "");
158 lp_exec_mask_update(mask);
159 }
160
161 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
162 {
163 assert(mask->cond_stack_size);
164 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
165 lp_exec_mask_update(mask);
166 }
167
168 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
169 {
170 LLVMBuilderRef builder = mask->bld->gallivm->builder;
171
172 if (mask->loop_stack_size == 0) {
173 assert(mask->loop_block == NULL);
174 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
175 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
176 assert(mask->break_var == NULL);
177 }
178
179 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
180
181 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
182 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
183 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
184 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
185 ++mask->loop_stack_size;
186
187 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
188 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
189
190 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
191
192 LLVMBuildBr(builder, mask->loop_block);
193 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
194
195 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
196
197 lp_exec_mask_update(mask);
198 }
199
200 static void lp_exec_break(struct lp_exec_mask *mask)
201 {
202 LLVMBuilderRef builder = mask->bld->gallivm->builder;
203 LLVMValueRef exec_mask = LLVMBuildNot(builder,
204 mask->exec_mask,
205 "break");
206
207 mask->break_mask = LLVMBuildAnd(builder,
208 mask->break_mask,
209 exec_mask, "break_full");
210
211 lp_exec_mask_update(mask);
212 }
213
214 static void lp_exec_continue(struct lp_exec_mask *mask)
215 {
216 LLVMBuilderRef builder = mask->bld->gallivm->builder;
217 LLVMValueRef exec_mask = LLVMBuildNot(builder,
218 mask->exec_mask,
219 "");
220
221 mask->cont_mask = LLVMBuildAnd(builder,
222 mask->cont_mask,
223 exec_mask, "");
224
225 lp_exec_mask_update(mask);
226 }
227
228
229 static void lp_exec_endloop(struct gallivm_state *gallivm,
230 struct lp_exec_mask *mask)
231 {
232 LLVMBuilderRef builder = mask->bld->gallivm->builder;
233 LLVMBasicBlockRef endloop;
234 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
235 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
236 mask->bld->type.width *
237 mask->bld->type.length);
238 LLVMValueRef i1cond, i2cond, icond, limiter;
239
240 assert(mask->break_mask);
241
242 /*
243 * Restore the cont_mask, but don't pop
244 */
245 assert(mask->loop_stack_size);
246 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
247 lp_exec_mask_update(mask);
248
249 /*
250 * Unlike the continue mask, the break_mask must be preserved across loop
251 * iterations
252 */
253 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
254
255 /* Decrement the loop limiter */
256 limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
257
258 limiter = LLVMBuildSub(
259 builder,
260 limiter,
261 LLVMConstInt(int_type, 1, false),
262 "");
263
264 LLVMBuildStore(builder, limiter, mask->loop_limiter);
265
266 /* i1cond = (mask != 0) */
267 i1cond = LLVMBuildICmp(
268 builder,
269 LLVMIntNE,
270 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
271 LLVMConstNull(reg_type), "");
272
273 /* i2cond = (looplimiter > 0) */
274 i2cond = LLVMBuildICmp(
275 builder,
276 LLVMIntSGT,
277 limiter,
278 LLVMConstNull(int_type), "");
279
280 /* if( i1cond && i2cond ) */
281 icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
282
283 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
284
285 LLVMBuildCondBr(builder,
286 icond, mask->loop_block, endloop);
287
288 LLVMPositionBuilderAtEnd(builder, endloop);
289
290 assert(mask->loop_stack_size);
291 --mask->loop_stack_size;
292 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
293 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
294 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
295 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
296
297 lp_exec_mask_update(mask);
298 }
299
300 /* stores val into an address pointed to by dst.
301 * mask->exec_mask is used to figure out which bits of val
302 * should be stored into the address
303 * (0 means don't store this bit, 1 means do store).
304 */
305 static void lp_exec_mask_store(struct lp_exec_mask *mask,
306 struct lp_build_context *bld_store,
307 LLVMValueRef pred,
308 LLVMValueRef val,
309 LLVMValueRef dst)
310 {
311 LLVMBuilderRef builder = mask->bld->gallivm->builder;
312
313 /* Mix the predicate and execution mask */
314 if (mask->has_mask) {
315 if (pred) {
316 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
317 } else {
318 pred = mask->exec_mask;
319 }
320 }
321
322 if (pred) {
323 LLVMValueRef real_val, dst_val;
324
325 dst_val = LLVMBuildLoad(builder, dst, "");
326 real_val = lp_build_select(bld_store,
327 pred,
328 val, dst_val);
329
330 LLVMBuildStore(builder, real_val, dst);
331 } else
332 LLVMBuildStore(builder, val, dst);
333 }
334
335 static void lp_exec_mask_call(struct lp_exec_mask *mask,
336 int func,
337 int *pc)
338 {
339 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
340 mask->call_stack[mask->call_stack_size].pc = *pc;
341 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
342 mask->call_stack_size++;
343 *pc = func;
344 }
345
346 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
347 {
348 LLVMBuilderRef builder = mask->bld->gallivm->builder;
349 LLVMValueRef exec_mask;
350
351 if (mask->call_stack_size == 0) {
352 /* returning from main() */
353 *pc = -1;
354 return;
355 }
356 exec_mask = LLVMBuildNot(builder,
357 mask->exec_mask,
358 "ret");
359
360 mask->ret_mask = LLVMBuildAnd(builder,
361 mask->ret_mask,
362 exec_mask, "ret_full");
363
364 lp_exec_mask_update(mask);
365 }
366
367 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
368 {
369 }
370
371 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
372 {
373 assert(mask->call_stack_size);
374 mask->call_stack_size--;
375 *pc = mask->call_stack[mask->call_stack_size].pc;
376 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
377 lp_exec_mask_update(mask);
378 }
379
380
381 /**
382 * Return pointer to a temporary register channel (src or dest).
383 * Note that indirect addressing cannot be handled here.
384 * \param index which temporary register
385 * \param chan which channel of the temp register.
386 */
387 LLVMValueRef
388 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
389 unsigned index,
390 unsigned chan)
391 {
392 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
393 assert(chan < 4);
394 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
395 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
396 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
397 }
398 else {
399 return bld->temps[index][chan];
400 }
401 }
402
403 /**
404 * Return pointer to a output register channel (src or dest).
405 * Note that indirect addressing cannot be handled here.
406 * \param index which output register
407 * \param chan which channel of the output register.
408 */
409 LLVMValueRef
410 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
411 unsigned index,
412 unsigned chan)
413 {
414 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
415 assert(chan < 4);
416 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
417 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
418 index * 4 + chan);
419 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
420 }
421 else {
422 return bld->outputs[index][chan];
423 }
424 }
425
426 /**
427 * Gather vector.
428 * XXX the lp_build_gather() function should be capable of doing this
429 * with a little work.
430 */
431 static LLVMValueRef
432 build_gather(struct lp_build_context *bld,
433 LLVMValueRef base_ptr,
434 LLVMValueRef indexes)
435 {
436 LLVMBuilderRef builder = bld->gallivm->builder;
437 LLVMValueRef res = bld->undef;
438 unsigned i;
439
440 /*
441 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
442 */
443 for (i = 0; i < bld->type.length; i++) {
444 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
445 LLVMValueRef index = LLVMBuildExtractElement(builder,
446 indexes, ii, "");
447 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
448 &index, 1, "gather_ptr");
449 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
450
451 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
452 }
453
454 return res;
455 }
456
457
458 /**
459 * Scatter/store vector.
460 */
461 static void
462 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
463 LLVMValueRef base_ptr,
464 LLVMValueRef indexes,
465 LLVMValueRef values,
466 struct lp_exec_mask *mask,
467 LLVMValueRef pred)
468 {
469 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
470 LLVMBuilderRef builder = gallivm->builder;
471 unsigned i;
472
473 /* Mix the predicate and execution mask */
474 if (mask->has_mask) {
475 if (pred) {
476 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
477 }
478 else {
479 pred = mask->exec_mask;
480 }
481 }
482
483 /*
484 * Loop over elements of index_vec, store scalar value.
485 */
486 for (i = 0; i < bld->bld_base.base.type.length; i++) {
487 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
488 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
489 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
490 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
491 LLVMValueRef scalar_pred = pred ?
492 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
493
494 if (0)
495 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
496 ii, val, index, scalar_ptr);
497
498 if (scalar_pred) {
499 LLVMValueRef real_val, dst_val;
500 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
501 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
502 LLVMBuildStore(builder, real_val, scalar_ptr);
503 }
504 else {
505 LLVMBuildStore(builder, val, scalar_ptr);
506 }
507 }
508 }
509
510
511 /**
512 * Read the current value of the ADDR register, convert the floats to
513 * ints, add the base index and return the vector of offsets.
514 * The offsets will be used to index into the constant buffer or
515 * temporary register file.
516 */
517 static LLVMValueRef
518 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
519 unsigned reg_file, unsigned reg_index,
520 const struct tgsi_src_register *indirect_reg)
521 {
522 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
523 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
524 /* always use X component of address register */
525 unsigned swizzle = indirect_reg->SwizzleX;
526 LLVMValueRef base;
527 LLVMValueRef rel;
528 LLVMValueRef max_index;
529 LLVMValueRef index;
530
531 assert(bld->indirect_files & (1 << reg_file));
532
533 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
534
535 assert(swizzle < 4);
536 rel = LLVMBuildLoad(builder,
537 bld->addr[indirect_reg->Index][swizzle],
538 "load addr reg");
539
540 index = lp_build_add(uint_bld, base, rel);
541
542 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
543 uint_bld->type,
544 bld->bld_base.info->file_max[reg_file]);
545
546 assert(!uint_bld->type.sign);
547 index = lp_build_min(uint_bld, index, max_index);
548
549 return index;
550 }
551
552 static struct lp_build_context *
553 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
554 enum tgsi_opcode_type stype)
555 {
556 struct lp_build_context *bld_fetch;
557
558 switch (stype) {
559 case TGSI_TYPE_FLOAT:
560 case TGSI_TYPE_UNTYPED:
561 bld_fetch = &bld_base->base;
562 break;
563 case TGSI_TYPE_UNSIGNED:
564 bld_fetch = &bld_base->uint_bld;
565 break;
566 case TGSI_TYPE_SIGNED:
567 bld_fetch = &bld_base->int_bld;
568 break;
569 case TGSI_TYPE_VOID:
570 case TGSI_TYPE_DOUBLE:
571 default:
572 assert(0);
573 bld_fetch = NULL;
574 break;
575 }
576 return bld_fetch;
577 }
578
579 static LLVMValueRef
580 emit_fetch_constant(
581 struct lp_build_tgsi_context * bld_base,
582 const struct tgsi_full_src_register * reg,
583 enum tgsi_opcode_type stype,
584 unsigned swizzle)
585 {
586 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
587 struct gallivm_state *gallivm = bld_base->base.gallivm;
588 LLVMBuilderRef builder = gallivm->builder;
589 struct lp_build_context *uint_bld = &bld_base->uint_bld;
590 LLVMValueRef indirect_index = NULL;
591 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
592 unsigned dimension = 0;
593 LLVMValueRef dimension_index;
594 LLVMValueRef consts_ptr;
595
596 /* XXX: Handle fetching xyzw components as a vector */
597 assert(swizzle != ~0);
598
599 if (reg->Register.Dimension) {
600 assert(!reg->Dimension.Indirect);
601 dimension = reg->Dimension.Index;
602 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
603 }
604
605 dimension_index = lp_build_const_int32(gallivm, dimension);
606 consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
607
608 if (reg->Register.Indirect) {
609 indirect_index = get_indirect_index(bld,
610 reg->Register.File,
611 reg->Register.Index,
612 &reg->Indirect);
613 }
614
615 if (reg->Register.Indirect) {
616 LLVMValueRef swizzle_vec =
617 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
618 LLVMValueRef index_vec; /* index into the const buffer */
619
620 /* index_vec = indirect_index * 4 + swizzle */
621 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
622 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
623
624 /* Gather values from the constant buffer */
625 return build_gather(bld_fetch, consts_ptr, index_vec);
626 }
627 else {
628 LLVMValueRef index; /* index into the const buffer */
629 LLVMValueRef scalar, scalar_ptr;
630
631 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
632
633 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
634 &index, 1, "");
635
636 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
637 LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
638 LLVMValueRef temp_ptr;
639 temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
640 scalar = LLVMBuildLoad(builder, temp_ptr, "");
641 } else
642 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
643
644 return lp_build_broadcast_scalar(bld_fetch, scalar);
645 }
646 }
647
648 static LLVMValueRef
649 emit_fetch_immediate(
650 struct lp_build_tgsi_context * bld_base,
651 const struct tgsi_full_src_register * reg,
652 enum tgsi_opcode_type stype,
653 unsigned swizzle)
654 {
655 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
656 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
657 assert(res);
658
659 if (stype == TGSI_TYPE_UNSIGNED) {
660 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
661 } else if (stype == TGSI_TYPE_SIGNED) {
662 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
663 }
664 return res;
665 }
666
667 static LLVMValueRef
668 emit_fetch_input(
669 struct lp_build_tgsi_context * bld_base,
670 const struct tgsi_full_src_register * reg,
671 enum tgsi_opcode_type stype,
672 unsigned swizzle)
673 {
674 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
675 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
676 LLVMBuilderRef builder = gallivm->builder;
677 struct lp_build_context *uint_bld = &bld_base->uint_bld;
678 LLVMValueRef indirect_index = NULL;
679 LLVMValueRef res;
680
681 if (reg->Register.Indirect) {
682 indirect_index = get_indirect_index(bld,
683 reg->Register.File,
684 reg->Register.Index,
685 &reg->Indirect);
686 }
687
688 if (reg->Register.Indirect) {
689 LLVMValueRef swizzle_vec =
690 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
691 LLVMValueRef length_vec =
692 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
693 LLVMValueRef index_vec; /* index into the const buffer */
694 LLVMValueRef inputs_array;
695 LLVMTypeRef float4_ptr_type;
696
697 /* index_vec = (indirect_index * 4 + swizzle) * length */
698 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
699 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
700 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
701
702 /* cast inputs_array pointer to float* */
703 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
704 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
705 float4_ptr_type, "");
706
707 /* Gather values from the temporary register array */
708 res = build_gather(&bld_base->base, inputs_array, index_vec);
709 } else {
710 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
711 LLVMValueRef lindex = lp_build_const_int32(gallivm,
712 reg->Register.Index * 4 + swizzle);
713 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
714 bld->inputs_array, &lindex, 1, "");
715 res = LLVMBuildLoad(builder, input_ptr, "");
716 }
717 else {
718 res = bld->inputs[reg->Register.Index][swizzle];
719 }
720 }
721
722 assert(res);
723
724 if (stype == TGSI_TYPE_UNSIGNED) {
725 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
726 } else if (stype == TGSI_TYPE_SIGNED) {
727 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
728 }
729
730 return res;
731 }
732
733 static LLVMValueRef
734 emit_fetch_temporary(
735 struct lp_build_tgsi_context * bld_base,
736 const struct tgsi_full_src_register * reg,
737 enum tgsi_opcode_type stype,
738 unsigned swizzle)
739 {
740 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
741 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
742 LLVMBuilderRef builder = gallivm->builder;
743 struct lp_build_context *uint_bld = &bld_base->uint_bld;
744 LLVMValueRef indirect_index = NULL;
745 LLVMValueRef res;
746
747 if (reg->Register.Indirect) {
748 indirect_index = get_indirect_index(bld,
749 reg->Register.File,
750 reg->Register.Index,
751 &reg->Indirect);
752 }
753
754 if (reg->Register.Indirect) {
755 LLVMValueRef swizzle_vec =
756 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
757 LLVMValueRef length_vec =
758 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
759 bld->bld_base.base.type.length);
760 LLVMValueRef index_vec; /* index into the const buffer */
761 LLVMValueRef temps_array;
762 LLVMTypeRef float4_ptr_type;
763
764 /* index_vec = (indirect_index * 4 + swizzle) * length */
765 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
766 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
767 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
768
769 /* cast temps_array pointer to float* */
770 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
771 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
772 float4_ptr_type, "");
773
774 /* Gather values from the temporary register array */
775 res = build_gather(&bld_base->base, temps_array, index_vec);
776 }
777 else {
778 LLVMValueRef temp_ptr;
779 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
780 LLVMTypeRef itype = LLVMPointerType(bld->bld_base.int_bld.vec_type, 0);
781 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
782 swizzle);
783 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
784 } else
785 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
786 res = LLVMBuildLoad(builder, temp_ptr, "");
787 if (!res)
788 return bld->bld_base.base.undef;
789 }
790
791 return res;
792 }
793
794 static LLVMValueRef
795 emit_fetch_system_value(
796 struct lp_build_tgsi_context * bld_base,
797 const struct tgsi_full_src_register * reg,
798 enum tgsi_opcode_type stype,
799 unsigned swizzle)
800 {
801 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
802 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
803 const struct tgsi_shader_info *info = bld->bld_base.info;
804 LLVMBuilderRef builder = gallivm->builder;
805 LLVMValueRef res;
806 enum tgsi_opcode_type atype; // Actual type of the value
807
808 assert(!reg->Register.Indirect);
809
810 switch (info->system_value_semantic_name[reg->Register.Index]) {
811 case TGSI_SEMANTIC_INSTANCEID:
812 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
813 atype = TGSI_TYPE_UNSIGNED;
814 break;
815
816 case TGSI_SEMANTIC_VERTEXID:
817 res = bld->system_values.vertex_id;
818 atype = TGSI_TYPE_UNSIGNED;
819 break;
820
821 default:
822 assert(!"unexpected semantic in emit_fetch_system_value");
823 res = bld_base->base.zero;
824 atype = TGSI_TYPE_FLOAT;
825 break;
826 }
827
828 if (atype != stype) {
829 if (stype == TGSI_TYPE_FLOAT) {
830 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
831 } else if (stype == TGSI_TYPE_UNSIGNED) {
832 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
833 } else if (stype == TGSI_TYPE_SIGNED) {
834 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
835 }
836 }
837
838 return res;
839 }
840
841 /**
842 * Register fetch with derivatives.
843 */
844 static void
845 emit_fetch_deriv(
846 struct lp_build_tgsi_soa_context *bld,
847 LLVMValueRef src,
848 LLVMValueRef *res,
849 LLVMValueRef *ddx,
850 LLVMValueRef *ddy)
851 {
852 if(res)
853 *res = src;
854
855 /* TODO: use interpolation coeffs for inputs */
856
857 if(ddx)
858 *ddx = lp_build_ddx(&bld->bld_base.base, src);
859
860 if(ddy)
861 *ddy = lp_build_ddy(&bld->bld_base.base, src);
862 }
863
864
865 /**
866 * Predicate.
867 */
868 static void
869 emit_fetch_predicate(
870 struct lp_build_tgsi_soa_context *bld,
871 const struct tgsi_full_instruction *inst,
872 LLVMValueRef *pred)
873 {
874 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
875 unsigned index;
876 unsigned char swizzles[4];
877 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
878 LLVMValueRef value;
879 unsigned chan;
880
881 if (!inst->Instruction.Predicate) {
882 TGSI_FOR_EACH_CHANNEL( chan ) {
883 pred[chan] = NULL;
884 }
885 return;
886 }
887
888 swizzles[0] = inst->Predicate.SwizzleX;
889 swizzles[1] = inst->Predicate.SwizzleY;
890 swizzles[2] = inst->Predicate.SwizzleZ;
891 swizzles[3] = inst->Predicate.SwizzleW;
892
893 index = inst->Predicate.Index;
894 assert(index < LP_MAX_TGSI_PREDS);
895
896 TGSI_FOR_EACH_CHANNEL( chan ) {
897 unsigned swizzle = swizzles[chan];
898
899 /*
900 * Only fetch the predicate register channels that are actually listed
901 * in the swizzles
902 */
903 if (!unswizzled[swizzle]) {
904 value = LLVMBuildLoad(builder,
905 bld->preds[index][swizzle], "");
906
907 /*
908 * Convert the value to an integer mask.
909 *
910 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
911 * is needlessly causing two comparisons due to storing the intermediate
912 * result as float vector instead of an integer mask vector.
913 */
914 value = lp_build_compare(bld->bld_base.base.gallivm,
915 bld->bld_base.base.type,
916 PIPE_FUNC_NOTEQUAL,
917 value,
918 bld->bld_base.base.zero);
919 if (inst->Predicate.Negate) {
920 value = LLVMBuildNot(builder, value, "");
921 }
922
923 unswizzled[swizzle] = value;
924 } else {
925 value = unswizzled[swizzle];
926 }
927
928 pred[chan] = value;
929 }
930 }
931
932 /**
933 * Register store.
934 */
935 static void
936 emit_store_chan(
937 struct lp_build_tgsi_context *bld_base,
938 const struct tgsi_full_instruction *inst,
939 unsigned index,
940 unsigned chan_index,
941 LLVMValueRef pred,
942 LLVMValueRef value)
943 {
944 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
945 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
946 LLVMBuilderRef builder = gallivm->builder;
947 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
948 struct lp_build_context *uint_bld = &bld_base->uint_bld;
949 LLVMValueRef indirect_index = NULL;
950 struct lp_build_context *bld_store;
951 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
952
953 switch (dtype) {
954 default:
955 case TGSI_TYPE_FLOAT:
956 case TGSI_TYPE_UNTYPED:
957 bld_store = &bld_base->base;
958 break;
959 case TGSI_TYPE_UNSIGNED:
960 bld_store = &bld_base->uint_bld;
961 break;
962 case TGSI_TYPE_SIGNED:
963 bld_store = &bld_base->int_bld;
964 break;
965 case TGSI_TYPE_DOUBLE:
966 case TGSI_TYPE_VOID:
967 assert(0);
968 bld_store = NULL;
969 break;
970 }
971
972 switch( inst->Instruction.Saturate ) {
973 case TGSI_SAT_NONE:
974 break;
975
976 case TGSI_SAT_ZERO_ONE:
977 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
978 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
979 break;
980
981 case TGSI_SAT_MINUS_PLUS_ONE:
982 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
983 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
984 break;
985
986 default:
987 assert(0);
988 }
989
990 if (reg->Register.Indirect) {
991 indirect_index = get_indirect_index(bld,
992 reg->Register.File,
993 reg->Register.Index,
994 &reg->Indirect);
995 } else {
996 assert(reg->Register.Index <=
997 bld->bld_base.info->file_max[reg->Register.File]);
998 }
999
1000 switch( reg->Register.File ) {
1001 case TGSI_FILE_OUTPUT:
1002 if (reg->Register.Indirect) {
1003 LLVMValueRef chan_vec =
1004 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1005 LLVMValueRef length_vec =
1006 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
1007 LLVMValueRef index_vec; /* indexes into the temp registers */
1008 LLVMValueRef outputs_array;
1009 LLVMValueRef pixel_offsets;
1010 LLVMTypeRef float_ptr_type;
1011 int i;
1012
1013 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1014 pixel_offsets = uint_bld->undef;
1015 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1016 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1017 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1018 ii, ii, "");
1019 }
1020
1021 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1022 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1023 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1024 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1025 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1026
1027 float_ptr_type =
1028 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1029 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1030 float_ptr_type, "");
1031
1032 /* Scatter store values into temp registers */
1033 emit_mask_scatter(bld, outputs_array, index_vec, value,
1034 &bld->exec_mask, pred);
1035 }
1036 else {
1037 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1038 chan_index);
1039 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1040 }
1041 break;
1042
1043 case TGSI_FILE_TEMPORARY:
1044 if (reg->Register.Indirect) {
1045 LLVMValueRef chan_vec =
1046 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1047 LLVMValueRef length_vec =
1048 lp_build_const_int_vec(gallivm, uint_bld->type,
1049 bld->bld_base.base.type.length);
1050 LLVMValueRef index_vec; /* indexes into the temp registers */
1051 LLVMValueRef temps_array;
1052 LLVMValueRef pixel_offsets;
1053 LLVMTypeRef float_ptr_type;
1054 int i;
1055
1056 /* build pixel offset vector: {0, 1, 2, 3, ...} */
1057 pixel_offsets = uint_bld->undef;
1058 for (i = 0; i < bld->bld_base.base.type.length; i++) {
1059 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1060 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1061 ii, ii, "");
1062 }
1063
1064 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1065 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1066 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1067 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1068 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1069
1070 float_ptr_type =
1071 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1072 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1073 float_ptr_type, "");
1074
1075 /* Scatter store values into temp registers */
1076 emit_mask_scatter(bld, temps_array, index_vec, value,
1077 &bld->exec_mask, pred);
1078 }
1079 else {
1080 LLVMValueRef temp_ptr;
1081
1082 switch (dtype) {
1083 case TGSI_TYPE_UNSIGNED:
1084 case TGSI_TYPE_SIGNED: {
1085 LLVMTypeRef itype = bld_base->int_bld.vec_type;
1086 LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1087 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1088 chan_index);
1089 LLVMValueRef temp_value_ptr;
1090
1091 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1092 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1093 value = temp_value_ptr;
1094 break;
1095 }
1096 default:
1097 case TGSI_TYPE_FLOAT:
1098 case TGSI_TYPE_UNTYPED:
1099 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1100 chan_index);
1101 break;
1102 }
1103
1104 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1105 }
1106 break;
1107
1108 case TGSI_FILE_ADDRESS:
1109 assert(dtype == TGSI_TYPE_SIGNED);
1110 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1111 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1112 bld->addr[reg->Register.Index][chan_index]);
1113 break;
1114
1115 case TGSI_FILE_PREDICATE:
1116 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1117 bld->preds[reg->Register.Index][chan_index]);
1118 break;
1119
1120 default:
1121 assert( 0 );
1122 }
1123 }
1124
1125 static void
1126 emit_store(
1127 struct lp_build_tgsi_context * bld_base,
1128 const struct tgsi_full_instruction * inst,
1129 const struct tgsi_opcode_info * info,
1130 LLVMValueRef dst[4])
1131
1132 {
1133 unsigned chan_index;
1134 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1135
1136 if(info->num_dst) {
1137 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1138
1139 emit_fetch_predicate( bld, inst, pred );
1140
1141 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1142 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1143 }
1144 }
1145 }
1146
1147 /**
1148 * High-level instruction translators.
1149 */
1150
1151 static void
1152 emit_tex( struct lp_build_tgsi_soa_context *bld,
1153 const struct tgsi_full_instruction *inst,
1154 enum lp_build_tex_modifier modifier,
1155 LLVMValueRef *texel)
1156 {
1157 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1158 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1159 unsigned unit;
1160 LLVMValueRef lod_bias, explicit_lod;
1161 LLVMValueRef oow = NULL;
1162 LLVMValueRef coords[4];
1163 LLVMValueRef offsets[3] = { NULL };
1164 struct lp_derivatives derivs;
1165 unsigned num_coords;
1166 unsigned dims;
1167 unsigned i;
1168
1169 if (!bld->sampler) {
1170 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1171 for (i = 0; i < 4; i++) {
1172 texel[i] = bld->bld_base.base.undef;
1173 }
1174 return;
1175 }
1176
1177 derivs.ddx_ddy[0] = bld->bld_base.base.undef;
1178 derivs.ddx_ddy[1] = bld->bld_base.base.undef;
1179
1180 switch (inst->Texture.Texture) {
1181 case TGSI_TEXTURE_1D:
1182 num_coords = 1;
1183 dims = 1;
1184 break;
1185 case TGSI_TEXTURE_1D_ARRAY:
1186 num_coords = 2;
1187 dims = 1;
1188 break;
1189 case TGSI_TEXTURE_2D:
1190 case TGSI_TEXTURE_RECT:
1191 num_coords = 2;
1192 dims = 2;
1193 break;
1194 case TGSI_TEXTURE_SHADOW1D:
1195 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1196 num_coords = 3;
1197 dims = 1;
1198 break;
1199 case TGSI_TEXTURE_SHADOW2D:
1200 case TGSI_TEXTURE_SHADOWRECT:
1201 case TGSI_TEXTURE_2D_ARRAY:
1202 case TGSI_TEXTURE_CUBE:
1203 num_coords = 3;
1204 dims = 2;
1205 break;
1206 case TGSI_TEXTURE_3D:
1207 num_coords = 3;
1208 dims = 3;
1209 break;
1210 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1211 case TGSI_TEXTURE_SHADOWCUBE:
1212 num_coords = 4;
1213 dims = 2;
1214 break;
1215 default:
1216 assert(0);
1217 return;
1218 }
1219
1220 /* Note lod and especially projected are illegal in a LOT of cases */
1221 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1222 assert(num_coords < 4);
1223 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1224 explicit_lod = NULL;
1225 }
1226 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1227 assert(num_coords < 4);
1228 lod_bias = NULL;
1229 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1230 }
1231 else {
1232 lod_bias = NULL;
1233 explicit_lod = NULL;
1234 }
1235
1236 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1237 assert(num_coords < 4);
1238 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1239 oow = lp_build_rcp(&bld->bld_base.base, oow);
1240 }
1241
1242 for (i = 0; i < num_coords; i++) {
1243 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1244 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1245 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1246 }
1247 for (i = num_coords; i < 4; i++) {
1248 coords[i] = bld->bld_base.base.undef;
1249 }
1250
1251 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1252 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1253 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
1254 LLVMValueRef ddxdyonec[3];
1255 unsigned length = bld->bld_base.base.type.length;
1256 unsigned num_quads = length / 4;
1257 unsigned dim;
1258 unsigned quad;
1259
1260 for (dim = 0; dim < dims; ++dim) {
1261 LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1262 LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1263 for (quad = 0; quad < num_quads; ++quad) {
1264 unsigned s1 = 4*quad;
1265 unsigned s2 = 4*quad + length;
1266 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1267 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
1268 shuffles[4*quad + 2] = i32undef;
1269 shuffles[4*quad + 3] = i32undef;
1270 }
1271 ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
1272 LLVMConstVector(shuffles, length), "");
1273 }
1274 if (dims == 1) {
1275 derivs.ddx_ddy[0] = ddxdyonec[0];
1276 }
1277 else if (dims >= 2) {
1278 for (quad = 0; quad < num_quads; ++quad) {
1279 unsigned s1 = 4*quad;
1280 unsigned s2 = 4*quad + length;
1281 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1282 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
1283 shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
1284 shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
1285 }
1286 derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
1287 LLVMConstVector(shuffles, length), "");
1288 if (dims == 3) {
1289 derivs.ddx_ddy[1] = ddxdyonec[2];
1290 }
1291 }
1292 unit = inst->Src[3].Register.Index;
1293 } else {
1294 if (dims == 1) {
1295 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
1296 }
1297 else if (dims >= 2) {
1298 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
1299 coords[0], coords[1]);
1300 if (dims == 3) {
1301 derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
1302 }
1303 }
1304 unit = inst->Src[1].Register.Index;
1305 }
1306
1307 /* some advanced gather instructions (txgo) would require 4 offsets */
1308 if (inst->Texture.NumOffsets == 1) {
1309 unsigned dim;
1310 for (dim = 0; dim < dims; dim++) {
1311 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1312 }
1313 }
1314
1315 bld->sampler->emit_fetch_texel(bld->sampler,
1316 bld->bld_base.base.gallivm,
1317 bld->bld_base.base.type,
1318 FALSE,
1319 unit, coords,
1320 offsets,
1321 &derivs,
1322 lod_bias, explicit_lod,
1323 texel);
1324 }
1325
1326 static void
1327 emit_txf( struct lp_build_tgsi_soa_context *bld,
1328 const struct tgsi_full_instruction *inst,
1329 LLVMValueRef *texel)
1330 {
1331 unsigned unit;
1332 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
1333 LLVMValueRef explicit_lod = NULL;
1334 LLVMValueRef coords[3];
1335 LLVMValueRef offsets[3] = { NULL };
1336 struct lp_derivatives derivs;
1337 unsigned num_coords;
1338 unsigned dims;
1339 unsigned i;
1340
1341 if (!bld->sampler) {
1342 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1343 for (i = 0; i < 4; i++) {
1344 texel[i] = coord_undef;
1345 }
1346 return;
1347 }
1348
1349 derivs.ddx_ddy[0] = coord_undef;
1350 derivs.ddx_ddy[1] = coord_undef;
1351
1352 switch (inst->Texture.Texture) {
1353 case TGSI_TEXTURE_1D:
1354 case TGSI_TEXTURE_BUFFER:
1355 num_coords = 1;
1356 dims = 1;
1357 break;
1358 case TGSI_TEXTURE_1D_ARRAY:
1359 num_coords = 2;
1360 dims = 1;
1361 break;
1362 case TGSI_TEXTURE_2D:
1363 case TGSI_TEXTURE_RECT:
1364 num_coords = 2;
1365 dims = 2;
1366 break;
1367 case TGSI_TEXTURE_2D_ARRAY:
1368 num_coords = 3;
1369 dims = 2;
1370 break;
1371 case TGSI_TEXTURE_3D:
1372 num_coords = 3;
1373 dims = 3;
1374 break;
1375 default:
1376 assert(0);
1377 return;
1378 }
1379
1380 /* always have lod except for buffers ? */
1381 if (inst->Texture.Texture != TGSI_TEXTURE_BUFFER) {
1382 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1383 }
1384
1385 for (i = 0; i < num_coords; i++) {
1386 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1387 }
1388 for (i = num_coords; i < 3; i++) {
1389 coords[i] = coord_undef;
1390 }
1391
1392 unit = inst->Src[1].Register.Index;
1393
1394 if (inst->Texture.NumOffsets == 1) {
1395 unsigned dim;
1396 for (dim = 0; dim < dims; dim++) {
1397 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
1398 }
1399 }
1400
1401 bld->sampler->emit_fetch_texel(bld->sampler,
1402 bld->bld_base.base.gallivm,
1403 bld->bld_base.base.type,
1404 TRUE,
1405 unit, coords,
1406 offsets,
1407 &derivs,
1408 NULL, explicit_lod,
1409 texel);
1410 }
1411
1412 static void
1413 emit_txq( struct lp_build_tgsi_soa_context *bld,
1414 const struct tgsi_full_instruction *inst,
1415 LLVMValueRef *sizes_out)
1416 {
1417 LLVMValueRef explicit_lod;
1418 unsigned num_coords, has_lod;
1419 unsigned i;
1420
1421 switch (inst->Texture.Texture) {
1422 case TGSI_TEXTURE_1D:
1423 case TGSI_TEXTURE_SHADOW1D:
1424 num_coords = 1;
1425 has_lod = 1;
1426 break;
1427 case TGSI_TEXTURE_2D:
1428 case TGSI_TEXTURE_SHADOW2D:
1429 case TGSI_TEXTURE_CUBE:
1430 case TGSI_TEXTURE_SHADOWCUBE:
1431 case TGSI_TEXTURE_1D_ARRAY:
1432 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1433 num_coords = 2;
1434 has_lod = 1;
1435 break;
1436 case TGSI_TEXTURE_3D:
1437 // case TGSI_TEXTURE_CUBE_ARRAY:
1438 // case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1439 case TGSI_TEXTURE_2D_ARRAY:
1440 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1441 num_coords = 3;
1442 has_lod = 1;
1443 break;
1444
1445 case TGSI_TEXTURE_BUFFER:
1446 num_coords = 1;
1447 has_lod = 0;
1448 break;
1449
1450 case TGSI_TEXTURE_RECT:
1451 case TGSI_TEXTURE_SHADOWRECT:
1452 // case TGSI_TEXTURE_2D_MS:
1453 num_coords = 2;
1454 has_lod = 0;
1455 break;
1456
1457 // case TGSI_TEXTURE_2D_MS_ARRAY:
1458 // num_coords = 3;
1459 // has_lod = 0;
1460 // break;
1461
1462 default:
1463 assert(0);
1464 return;
1465 }
1466
1467 if (!bld->sampler) {
1468 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1469 for (i = 0; i < num_coords; i++)
1470 sizes_out[i] = bld->bld_base.base.undef;
1471 return;
1472 }
1473
1474 if (has_lod)
1475 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 );
1476 else
1477 explicit_lod = NULL;
1478
1479 bld->sampler->emit_size_query(bld->sampler,
1480 bld->bld_base.base.gallivm,
1481 bld->bld_base.int_bld.type,
1482 inst->Src[1].Register.Index,
1483 explicit_lod,
1484 sizes_out);
1485 }
1486
1487 static boolean
1488 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1489 int pc)
1490 {
1491 int i;
1492
1493 for (i = 0; i < 5; i++) {
1494 unsigned opcode;
1495
1496 if (pc + i >= bld->bld_base.info->num_instructions)
1497 return TRUE;
1498
1499 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1500
1501 if (opcode == TGSI_OPCODE_END)
1502 return TRUE;
1503
1504 if (opcode == TGSI_OPCODE_TEX ||
1505 opcode == TGSI_OPCODE_TXP ||
1506 opcode == TGSI_OPCODE_TXD ||
1507 opcode == TGSI_OPCODE_TXB ||
1508 opcode == TGSI_OPCODE_TXL ||
1509 opcode == TGSI_OPCODE_TXF ||
1510 opcode == TGSI_OPCODE_TXQ ||
1511 opcode == TGSI_OPCODE_CAL ||
1512 opcode == TGSI_OPCODE_CALLNZ ||
1513 opcode == TGSI_OPCODE_IF ||
1514 opcode == TGSI_OPCODE_IFC ||
1515 opcode == TGSI_OPCODE_BGNLOOP ||
1516 opcode == TGSI_OPCODE_SWITCH)
1517 return FALSE;
1518 }
1519
1520 return TRUE;
1521 }
1522
1523
1524
1525 /**
1526 * Kill fragment if any of the src register values are negative.
1527 */
1528 static void
1529 emit_kil(
1530 struct lp_build_tgsi_soa_context *bld,
1531 const struct tgsi_full_instruction *inst,
1532 int pc)
1533 {
1534 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1535 const struct tgsi_full_src_register *reg = &inst->Src[0];
1536 LLVMValueRef terms[TGSI_NUM_CHANNELS];
1537 LLVMValueRef mask;
1538 unsigned chan_index;
1539
1540 memset(&terms, 0, sizeof terms);
1541
1542 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1543 unsigned swizzle;
1544
1545 /* Unswizzle channel */
1546 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1547
1548 /* Check if the component has not been already tested. */
1549 assert(swizzle < TGSI_NUM_CHANNELS);
1550 if( !terms[swizzle] )
1551 /* TODO: change the comparison operator instead of setting the sign */
1552 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1553 }
1554
1555 mask = NULL;
1556 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1557 if(terms[chan_index]) {
1558 LLVMValueRef chan_mask;
1559
1560 /*
1561 * If term < 0 then mask = 0 else mask = ~0.
1562 */
1563 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1564
1565 if(mask)
1566 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1567 else
1568 mask = chan_mask;
1569 }
1570 }
1571
1572 if(mask) {
1573 lp_build_mask_update(bld->mask, mask);
1574
1575 if (!near_end_of_shader(bld, pc))
1576 lp_build_mask_check(bld->mask);
1577 }
1578 }
1579
1580
1581 /**
1582 * Predicated fragment kill.
1583 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1584 * The only predication is the execution mask which will apply if
1585 * we're inside a loop or conditional.
1586 */
1587 static void
1588 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1589 int pc)
1590 {
1591 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1592 LLVMValueRef mask;
1593
1594 /* For those channels which are "alive", disable fragment shader
1595 * execution.
1596 */
1597 if (bld->exec_mask.has_mask) {
1598 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1599 }
1600 else {
1601 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1602 mask = zero;
1603 }
1604
1605 lp_build_mask_update(bld->mask, mask);
1606
1607 if (!near_end_of_shader(bld, pc))
1608 lp_build_mask_check(bld->mask);
1609 }
1610
1611
1612 /**
1613 * Emit code which will dump the value of all the temporary registers
1614 * to stdout.
1615 */
1616 static void
1617 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1618 {
1619 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1620 LLVMBuilderRef builder = gallivm->builder;
1621 LLVMValueRef temp_ptr;
1622 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1623 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1624 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1625 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1626 int index;
1627 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1628
1629 for (index = 0; index < n; index++) {
1630 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1631 LLVMValueRef v[4][4], res;
1632 int chan;
1633
1634 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1635
1636 for (chan = 0; chan < 4; chan++) {
1637 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1638 res = LLVMBuildLoad(builder, temp_ptr, "");
1639 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1640 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1641 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1642 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1643 }
1644
1645 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1646 v[0][0], v[0][1], v[0][2], v[0][3]);
1647 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1648 v[1][0], v[1][1], v[1][2], v[1][3]);
1649 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1650 v[2][0], v[2][1], v[2][2], v[2][3]);
1651 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1652 v[3][0], v[3][1], v[3][2], v[3][3]);
1653 }
1654 }
1655
1656
1657
1658 void
1659 lp_emit_declaration_soa(
1660 struct lp_build_tgsi_context *bld_base,
1661 const struct tgsi_full_declaration *decl)
1662 {
1663 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1664 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1665 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1666 const unsigned first = decl->Range.First;
1667 const unsigned last = decl->Range.Last;
1668 unsigned idx, i;
1669
1670 for (idx = first; idx <= last; ++idx) {
1671 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1672 switch (decl->Declaration.File) {
1673 case TGSI_FILE_TEMPORARY:
1674 assert(idx < LP_MAX_TGSI_TEMPS);
1675 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1676 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1677 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1678 }
1679 break;
1680
1681 case TGSI_FILE_OUTPUT:
1682 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1683 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1684 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1685 vec_type, "output");
1686 }
1687 break;
1688
1689 case TGSI_FILE_ADDRESS:
1690 /* ADDR registers are the only allocated with an integer LLVM IR type,
1691 * as they are guaranteed to always have integers.
1692 * XXX: Not sure if this exception is worthwhile (or the whole idea of
1693 * an ADDR register for that matter).
1694 */
1695 assert(idx < LP_MAX_TGSI_ADDRS);
1696 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1697 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1698 break;
1699
1700 case TGSI_FILE_PREDICATE:
1701 assert(idx < LP_MAX_TGSI_PREDS);
1702 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1703 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1704 "predicate");
1705 break;
1706
1707 default:
1708 /* don't need to declare other vars */
1709 break;
1710 }
1711 }
1712 }
1713
1714
1715 void lp_emit_immediate_soa(
1716 struct lp_build_tgsi_context *bld_base,
1717 const struct tgsi_full_immediate *imm)
1718 {
1719 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1720 struct gallivm_state * gallivm = bld_base->base.gallivm;
1721
1722 /* simply copy the immediate values into the next immediates[] slot */
1723 unsigned i;
1724 const uint size = imm->Immediate.NrTokens - 1;
1725 assert(size <= 4);
1726 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1727 switch (imm->Immediate.DataType) {
1728 case TGSI_IMM_FLOAT32:
1729 for( i = 0; i < size; ++i )
1730 bld->immediates[bld->num_immediates][i] =
1731 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1732
1733 break;
1734 case TGSI_IMM_UINT32:
1735 for( i = 0; i < size; ++i ) {
1736 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1737 bld->immediates[bld->num_immediates][i] =
1738 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1739 }
1740
1741 break;
1742 case TGSI_IMM_INT32:
1743 for( i = 0; i < size; ++i ) {
1744 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1745 bld->immediates[bld->num_immediates][i] =
1746 LLVMConstBitCast(tmp, bld_base->base.vec_type);
1747 }
1748
1749 break;
1750 }
1751 for( i = size; i < 4; ++i )
1752 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1753
1754 bld->num_immediates++;
1755 }
1756
1757 static void
1758 ddx_emit(
1759 const struct lp_build_tgsi_action * action,
1760 struct lp_build_tgsi_context * bld_base,
1761 struct lp_build_emit_data * emit_data)
1762 {
1763 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1764
1765 emit_fetch_deriv(bld, emit_data->args[0], NULL,
1766 &emit_data->output[emit_data->chan], NULL);
1767 }
1768
1769 static void
1770 ddy_emit(
1771 const struct lp_build_tgsi_action * action,
1772 struct lp_build_tgsi_context * bld_base,
1773 struct lp_build_emit_data * emit_data)
1774 {
1775 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1776
1777 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1778 &emit_data->output[emit_data->chan]);
1779 }
1780
1781 static void
1782 kilp_emit(
1783 const struct lp_build_tgsi_action * action,
1784 struct lp_build_tgsi_context * bld_base,
1785 struct lp_build_emit_data * emit_data)
1786 {
1787 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1788
1789 emit_kilp(bld, bld_base->pc - 1);
1790 }
1791
1792 static void
1793 kil_emit(
1794 const struct lp_build_tgsi_action * action,
1795 struct lp_build_tgsi_context * bld_base,
1796 struct lp_build_emit_data * emit_data)
1797 {
1798 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1799
1800 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
1801 }
1802
1803 static void
1804 tex_emit(
1805 const struct lp_build_tgsi_action * action,
1806 struct lp_build_tgsi_context * bld_base,
1807 struct lp_build_emit_data * emit_data)
1808 {
1809 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1810
1811 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
1812 }
1813
1814 static void
1815 txb_emit(
1816 const struct lp_build_tgsi_action * action,
1817 struct lp_build_tgsi_context * bld_base,
1818 struct lp_build_emit_data * emit_data)
1819 {
1820 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1821
1822 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
1823 emit_data->output);
1824 }
1825
1826 static void
1827 txd_emit(
1828 const struct lp_build_tgsi_action * action,
1829 struct lp_build_tgsi_context * bld_base,
1830 struct lp_build_emit_data * emit_data)
1831 {
1832 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1833
1834 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
1835 emit_data->output);
1836 }
1837
1838 static void
1839 txl_emit(
1840 const struct lp_build_tgsi_action * action,
1841 struct lp_build_tgsi_context * bld_base,
1842 struct lp_build_emit_data * emit_data)
1843 {
1844 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1845
1846 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
1847 emit_data->output);
1848 }
1849
1850 static void
1851 txp_emit(
1852 const struct lp_build_tgsi_action * action,
1853 struct lp_build_tgsi_context * bld_base,
1854 struct lp_build_emit_data * emit_data)
1855 {
1856 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1857
1858 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
1859 emit_data->output);
1860 }
1861
1862 static void
1863 txq_emit(
1864 const struct lp_build_tgsi_action * action,
1865 struct lp_build_tgsi_context * bld_base,
1866 struct lp_build_emit_data * emit_data)
1867 {
1868 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1869
1870 emit_txq(bld, emit_data->inst, emit_data->output);
1871 }
1872
1873 static void
1874 txf_emit(
1875 const struct lp_build_tgsi_action * action,
1876 struct lp_build_tgsi_context * bld_base,
1877 struct lp_build_emit_data * emit_data)
1878 {
1879 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1880
1881 emit_txf(bld, emit_data->inst, emit_data->output);
1882 }
1883
1884 static void
1885 cal_emit(
1886 const struct lp_build_tgsi_action * action,
1887 struct lp_build_tgsi_context * bld_base,
1888 struct lp_build_emit_data * emit_data)
1889 {
1890 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1891
1892 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
1893 &bld_base->pc);
1894 }
1895
1896 static void
1897 ret_emit(
1898 const struct lp_build_tgsi_action * action,
1899 struct lp_build_tgsi_context * bld_base,
1900 struct lp_build_emit_data * emit_data)
1901 {
1902 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1903
1904 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
1905 }
1906
1907 static void
1908 brk_emit(
1909 const struct lp_build_tgsi_action * action,
1910 struct lp_build_tgsi_context * bld_base,
1911 struct lp_build_emit_data * emit_data)
1912 {
1913 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1914
1915 lp_exec_break(&bld->exec_mask);
1916 }
1917
1918 static void
1919 if_emit(
1920 const struct lp_build_tgsi_action * action,
1921 struct lp_build_tgsi_context * bld_base,
1922 struct lp_build_emit_data * emit_data)
1923 {
1924 LLVMValueRef tmp;
1925 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1926
1927 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
1928 emit_data->args[0], bld->bld_base.base.zero);
1929 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
1930 }
1931
1932 static void
1933 bgnloop_emit(
1934 const struct lp_build_tgsi_action * action,
1935 struct lp_build_tgsi_context * bld_base,
1936 struct lp_build_emit_data * emit_data)
1937 {
1938 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1939
1940 lp_exec_bgnloop(&bld->exec_mask);
1941 }
1942
1943 static void
1944 bgnsub_emit(
1945 const struct lp_build_tgsi_action * action,
1946 struct lp_build_tgsi_context * bld_base,
1947 struct lp_build_emit_data * emit_data)
1948 {
1949 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1950
1951 lp_exec_mask_bgnsub(&bld->exec_mask);
1952 }
1953
1954 static void
1955 else_emit(
1956 const struct lp_build_tgsi_action * action,
1957 struct lp_build_tgsi_context * bld_base,
1958 struct lp_build_emit_data * emit_data)
1959 {
1960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1961
1962 lp_exec_mask_cond_invert(&bld->exec_mask);
1963 }
1964
1965 static void
1966 endif_emit(
1967 const struct lp_build_tgsi_action * action,
1968 struct lp_build_tgsi_context * bld_base,
1969 struct lp_build_emit_data * emit_data)
1970 {
1971 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1972
1973 lp_exec_mask_cond_pop(&bld->exec_mask);
1974 }
1975
1976 static void
1977 endloop_emit(
1978 const struct lp_build_tgsi_action * action,
1979 struct lp_build_tgsi_context * bld_base,
1980 struct lp_build_emit_data * emit_data)
1981 {
1982 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1983
1984 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1985 }
1986
1987 static void
1988 endsub_emit(
1989 const struct lp_build_tgsi_action * action,
1990 struct lp_build_tgsi_context * bld_base,
1991 struct lp_build_emit_data * emit_data)
1992 {
1993 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1994
1995 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
1996 }
1997
1998 static void
1999 cont_emit(
2000 const struct lp_build_tgsi_action * action,
2001 struct lp_build_tgsi_context * bld_base,
2002 struct lp_build_emit_data * emit_data)
2003 {
2004 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2005
2006 lp_exec_continue(&bld->exec_mask);
2007 }
2008
2009 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
2010 *
2011 * XXX: What do the comments about xmm registers mean? Maybe they are left over
2012 * from old code, but there is no garauntee that LLVM will use those registers
2013 * for this code.
2014 *
2015 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
2016 * should be handled by the emit_data->fetch_args function. */
2017 static void
2018 nrm_emit(
2019 const struct lp_build_tgsi_action * action,
2020 struct lp_build_tgsi_context * bld_base,
2021 struct lp_build_emit_data * emit_data)
2022 {
2023 LLVMValueRef tmp0, tmp1;
2024 LLVMValueRef tmp4 = NULL;
2025 LLVMValueRef tmp5 = NULL;
2026 LLVMValueRef tmp6 = NULL;
2027 LLVMValueRef tmp7 = NULL;
2028 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2029
2030 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2031
2032 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
2033 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
2034 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
2035 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
2036
2037 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2038
2039 /* xmm4 = src.x */
2040 /* xmm0 = src.x * src.x */
2041 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
2042 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2043 tmp4 = tmp0;
2044 }
2045 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
2046
2047 /* xmm5 = src.y */
2048 /* xmm0 = xmm0 + src.y * src.y */
2049 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
2050 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2051 tmp5 = tmp1;
2052 }
2053 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2054 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2055
2056 /* xmm6 = src.z */
2057 /* xmm0 = xmm0 + src.z * src.z */
2058 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
2059 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2060 tmp6 = tmp1;
2061 }
2062 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2063 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2064
2065 if (dims == 4) {
2066 /* xmm7 = src.w */
2067 /* xmm0 = xmm0 + src.w * src.w */
2068 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
2069 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
2070 tmp7 = tmp1;
2071 }
2072 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
2073 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
2074 }
2075 /* xmm1 = 1 / sqrt(xmm0) */
2076 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
2077 /* dst.x = xmm1 * src.x */
2078 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
2079 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
2080 }
2081 /* dst.y = xmm1 * src.y */
2082 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
2083 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
2084 }
2085
2086 /* dst.z = xmm1 * src.z */
2087 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
2088 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
2089 }
2090 /* dst.w = xmm1 * src.w */
2091 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
2092 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
2093 }
2094 }
2095
2096 /* dst.w = 1.0 */
2097 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
2098 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
2099 }
2100 }
2101
2102 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
2103 {
2104 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2105 struct gallivm_state * gallivm = bld_base->base.gallivm;
2106
2107 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2108 LLVMValueRef array_size =
2109 lp_build_const_int32(gallivm,
2110 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2111 bld->temps_array = lp_build_array_alloca(gallivm,
2112 bld_base->base.vec_type, array_size,
2113 "temp_array");
2114 }
2115
2116 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2117 LLVMValueRef array_size =
2118 lp_build_const_int32(gallivm,
2119 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2120 bld->outputs_array = lp_build_array_alloca(gallivm,
2121 bld_base->base.vec_type, array_size,
2122 "output_array");
2123 }
2124
2125 /* If we have indirect addressing in inputs we need to copy them into
2126 * our alloca array to be able to iterate over them */
2127 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
2128 unsigned index, chan;
2129 LLVMTypeRef vec_type = bld_base->base.vec_type;
2130 LLVMValueRef array_size = lp_build_const_int32(gallivm,
2131 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2132 bld->inputs_array = lp_build_array_alloca(gallivm,
2133 vec_type, array_size,
2134 "input_array");
2135
2136 assert(bld_base->info->num_inputs
2137 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2138
2139 for (index = 0; index < bld_base->info->num_inputs; ++index) {
2140 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2141 LLVMValueRef lindex =
2142 lp_build_const_int32(gallivm, index * 4 + chan);
2143 LLVMValueRef input_ptr =
2144 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2145 &lindex, 1, "");
2146 LLVMValueRef value = bld->inputs[index][chan];
2147 if (value)
2148 LLVMBuildStore(gallivm->builder, value, input_ptr);
2149 }
2150 }
2151 }
2152 }
2153
2154 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
2155 {
2156 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2157
2158 if (0) {
2159 /* for debugging */
2160 emit_dump_temps(bld);
2161 }
2162
2163 /* If we have indirect addressing in outputs we need to copy our alloca array
2164 * to the outputs slots specified by the called */
2165 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2166 unsigned index, chan;
2167 assert(bld_base->info->num_outputs <=
2168 bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
2169 for (index = 0; index < bld_base->info->num_outputs; ++index) {
2170 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2171 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
2172 }
2173 }
2174 }
2175 }
2176
2177 void
2178 lp_build_tgsi_soa(struct gallivm_state *gallivm,
2179 const struct tgsi_token *tokens,
2180 struct lp_type type,
2181 struct lp_build_mask_context *mask,
2182 LLVMValueRef consts_ptr,
2183 const struct lp_bld_tgsi_system_values *system_values,
2184 const LLVMValueRef *pos,
2185 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
2186 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
2187 struct lp_build_sampler_soa *sampler,
2188 const struct tgsi_shader_info *info)
2189 {
2190 struct lp_build_tgsi_soa_context bld;
2191
2192 struct lp_type res_type;
2193
2194 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2195 memset(&res_type, 0, sizeof res_type);
2196 res_type.width = type.width;
2197 res_type.length = type.length;
2198 res_type.sign = 1;
2199
2200 /* Setup build context */
2201 memset(&bld, 0, sizeof bld);
2202 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2203 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2204 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2205 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2206 bld.mask = mask;
2207 bld.pos = pos;
2208 bld.inputs = inputs;
2209 bld.outputs = outputs;
2210 bld.consts_ptr = consts_ptr;
2211 bld.sampler = sampler;
2212 bld.bld_base.info = info;
2213 bld.indirect_files = info->indirect_files;
2214
2215 bld.bld_base.soa = TRUE;
2216 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2217 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2218 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2219 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2220 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2221 bld.bld_base.emit_store = emit_store;
2222
2223 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2224 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2225
2226 bld.bld_base.emit_prologue = emit_prologue;
2227 bld.bld_base.emit_epilogue = emit_epilogue;
2228
2229 /* Set opcode actions */
2230 lp_set_default_actions_cpu(&bld.bld_base);
2231
2232 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2233 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2234 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2235 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2236 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2237 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2238 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2239 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2240 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2241 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2242 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2243 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2244 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2245 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2246 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2247 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2248 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2249 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2250 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2251 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2252 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2253 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2254 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2255 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
2256
2257 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2258
2259 bld.system_values = *system_values;
2260
2261 lp_build_tgsi_llvm(&bld.bld_base, tokens);
2262
2263 if (0) {
2264 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2265 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2266 debug_printf("11111111111111111111111111111 \n");
2267 tgsi_dump(tokens, 0);
2268 lp_debug_dump_value(function);
2269 debug_printf("2222222222222222222222222222 \n");
2270 }
2271
2272 if (0) {
2273 LLVMModuleRef module = LLVMGetGlobalParent(
2274 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2275 LLVMDumpModule(module);
2276
2277 }
2278 }