gallivm: enable fetch for integer opcodes. (v2)
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_tgsi_action.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_bitarit.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_logic.h"
58 #include "lp_bld_swizzle.h"
59 #include "lp_bld_flow.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_tgsi.h"
62 #include "lp_bld_limits.h"
63 #include "lp_bld_debug.h"
64 #include "lp_bld_printf.h"
65
66
67 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
68 {
69 mask->bld = bld;
70 mask->has_mask = FALSE;
71 mask->cond_stack_size = 0;
72 mask->loop_stack_size = 0;
73 mask->call_stack_size = 0;
74
75 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
76 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
77 LLVMConstAllOnes(mask->int_vec_type);
78 }
79
80 static void lp_exec_mask_update(struct lp_exec_mask *mask)
81 {
82 LLVMBuilderRef builder = mask->bld->gallivm->builder;
83
84 if (mask->loop_stack_size) {
85 /*for loops we need to update the entire mask at runtime */
86 LLVMValueRef tmp;
87 assert(mask->break_mask);
88 tmp = LLVMBuildAnd(builder,
89 mask->cont_mask,
90 mask->break_mask,
91 "maskcb");
92 mask->exec_mask = LLVMBuildAnd(builder,
93 mask->cond_mask,
94 tmp,
95 "maskfull");
96 } else
97 mask->exec_mask = mask->cond_mask;
98
99 if (mask->call_stack_size) {
100 mask->exec_mask = LLVMBuildAnd(builder,
101 mask->exec_mask,
102 mask->ret_mask,
103 "callmask");
104 }
105
106 mask->has_mask = (mask->cond_stack_size > 0 ||
107 mask->loop_stack_size > 0 ||
108 mask->call_stack_size > 0);
109 }
110
111 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
112 LLVMValueRef val)
113 {
114 LLVMBuilderRef builder = mask->bld->gallivm->builder;
115
116 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
117 if (mask->cond_stack_size == 0) {
118 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
119 }
120 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
121 assert(LLVMTypeOf(val) == mask->int_vec_type);
122 mask->cond_mask = LLVMBuildAnd(builder,
123 mask->cond_mask,
124 val,
125 "");
126 lp_exec_mask_update(mask);
127 }
128
129 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
130 {
131 LLVMBuilderRef builder = mask->bld->gallivm->builder;
132 LLVMValueRef prev_mask;
133 LLVMValueRef inv_mask;
134
135 assert(mask->cond_stack_size);
136 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
137 if (mask->cond_stack_size == 1) {
138 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
139 }
140
141 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
142
143 mask->cond_mask = LLVMBuildAnd(builder,
144 inv_mask,
145 prev_mask, "");
146 lp_exec_mask_update(mask);
147 }
148
149 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
150 {
151 assert(mask->cond_stack_size);
152 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
153 lp_exec_mask_update(mask);
154 }
155
156 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
157 {
158 LLVMBuilderRef builder = mask->bld->gallivm->builder;
159
160 if (mask->loop_stack_size == 0) {
161 assert(mask->loop_block == NULL);
162 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
163 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
164 assert(mask->break_var == NULL);
165 }
166
167 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
168
169 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
170 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
171 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
172 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
173 ++mask->loop_stack_size;
174
175 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
176 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
177
178 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
179 LLVMBuildBr(builder, mask->loop_block);
180 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
181
182 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
183
184 lp_exec_mask_update(mask);
185 }
186
187 static void lp_exec_break(struct lp_exec_mask *mask)
188 {
189 LLVMBuilderRef builder = mask->bld->gallivm->builder;
190 LLVMValueRef exec_mask = LLVMBuildNot(builder,
191 mask->exec_mask,
192 "break");
193
194 mask->break_mask = LLVMBuildAnd(builder,
195 mask->break_mask,
196 exec_mask, "break_full");
197
198 lp_exec_mask_update(mask);
199 }
200
201 static void lp_exec_continue(struct lp_exec_mask *mask)
202 {
203 LLVMBuilderRef builder = mask->bld->gallivm->builder;
204 LLVMValueRef exec_mask = LLVMBuildNot(builder,
205 mask->exec_mask,
206 "");
207
208 mask->cont_mask = LLVMBuildAnd(builder,
209 mask->cont_mask,
210 exec_mask, "");
211
212 lp_exec_mask_update(mask);
213 }
214
215
216 static void lp_exec_endloop(struct gallivm_state *gallivm,
217 struct lp_exec_mask *mask)
218 {
219 LLVMBuilderRef builder = mask->bld->gallivm->builder;
220 LLVMBasicBlockRef endloop;
221 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
222 mask->bld->type.width *
223 mask->bld->type.length);
224 LLVMValueRef i1cond;
225
226 assert(mask->break_mask);
227
228 /*
229 * Restore the cont_mask, but don't pop
230 */
231 assert(mask->loop_stack_size);
232 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
233 lp_exec_mask_update(mask);
234
235 /*
236 * Unlike the continue mask, the break_mask must be preserved across loop
237 * iterations
238 */
239 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
240
241 /* i1cond = (mask == 0) */
242 i1cond = LLVMBuildICmp(
243 builder,
244 LLVMIntNE,
245 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
246 LLVMConstNull(reg_type), "");
247
248 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
249
250 LLVMBuildCondBr(builder,
251 i1cond, mask->loop_block, endloop);
252
253 LLVMPositionBuilderAtEnd(builder, endloop);
254
255 assert(mask->loop_stack_size);
256 --mask->loop_stack_size;
257 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
258 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
259 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
260 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
261
262 lp_exec_mask_update(mask);
263 }
264
265 /* stores val into an address pointed to by dst.
266 * mask->exec_mask is used to figure out which bits of val
267 * should be stored into the address
268 * (0 means don't store this bit, 1 means do store).
269 */
270 static void lp_exec_mask_store(struct lp_exec_mask *mask,
271 struct lp_build_context *bld_store,
272 LLVMValueRef pred,
273 LLVMValueRef val,
274 LLVMValueRef dst)
275 {
276 LLVMBuilderRef builder = mask->bld->gallivm->builder;
277
278 /* Mix the predicate and execution mask */
279 if (mask->has_mask) {
280 if (pred) {
281 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
282 } else {
283 pred = mask->exec_mask;
284 }
285 }
286
287 if (pred) {
288 LLVMValueRef real_val, dst_val;
289
290 dst_val = LLVMBuildLoad(builder, dst, "");
291 real_val = lp_build_select(bld_store,
292 pred,
293 val, dst_val);
294
295 LLVMBuildStore(builder, real_val, dst);
296 } else
297 LLVMBuildStore(builder, val, dst);
298 }
299
300 static void lp_exec_mask_call(struct lp_exec_mask *mask,
301 int func,
302 int *pc)
303 {
304 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
305 mask->call_stack[mask->call_stack_size].pc = *pc;
306 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
307 mask->call_stack_size++;
308 *pc = func;
309 }
310
311 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
312 {
313 LLVMBuilderRef builder = mask->bld->gallivm->builder;
314 LLVMValueRef exec_mask;
315
316 if (mask->call_stack_size == 0) {
317 /* returning from main() */
318 *pc = -1;
319 return;
320 }
321 exec_mask = LLVMBuildNot(builder,
322 mask->exec_mask,
323 "ret");
324
325 mask->ret_mask = LLVMBuildAnd(builder,
326 mask->ret_mask,
327 exec_mask, "ret_full");
328
329 lp_exec_mask_update(mask);
330 }
331
332 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
333 {
334 }
335
336 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
337 {
338 assert(mask->call_stack_size);
339 mask->call_stack_size--;
340 *pc = mask->call_stack[mask->call_stack_size].pc;
341 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
342 lp_exec_mask_update(mask);
343 }
344
345
346 /**
347 * Return pointer to a temporary register channel (src or dest).
348 * Note that indirect addressing cannot be handled here.
349 * \param index which temporary register
350 * \param chan which channel of the temp register.
351 */
352 LLVMValueRef
353 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
354 unsigned index,
355 unsigned chan)
356 {
357 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
358 assert(chan < 4);
359 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
360 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
361 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
362 }
363 else {
364 return bld->temps[index][chan];
365 }
366 }
367
368 /**
369 * Return pointer to a output register channel (src or dest).
370 * Note that indirect addressing cannot be handled here.
371 * \param index which output register
372 * \param chan which channel of the output register.
373 */
374 LLVMValueRef
375 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
376 unsigned index,
377 unsigned chan)
378 {
379 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
380 assert(chan < 4);
381 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
382 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
383 index * 4 + chan);
384 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
385 }
386 else {
387 return bld->outputs[index][chan];
388 }
389 }
390
391 /**
392 * Gather vector.
393 * XXX the lp_build_gather() function should be capable of doing this
394 * with a little work.
395 */
396 static LLVMValueRef
397 build_gather(struct lp_build_context *bld,
398 LLVMValueRef base_ptr,
399 LLVMValueRef indexes)
400 {
401 LLVMBuilderRef builder = bld->gallivm->builder;
402 LLVMValueRef res = bld->undef;
403 unsigned i;
404
405 /*
406 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
407 */
408 for (i = 0; i < bld->type.length; i++) {
409 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
410 LLVMValueRef index = LLVMBuildExtractElement(builder,
411 indexes, ii, "");
412 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
413 &index, 1, "gather_ptr");
414 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
415
416 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
417 }
418
419 return res;
420 }
421
422
423 /**
424 * Scatter/store vector.
425 */
426 static void
427 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
428 LLVMValueRef base_ptr,
429 LLVMValueRef indexes,
430 LLVMValueRef values,
431 struct lp_exec_mask *mask,
432 LLVMValueRef pred)
433 {
434 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
435 LLVMBuilderRef builder = gallivm->builder;
436 unsigned i;
437
438 /* Mix the predicate and execution mask */
439 if (mask->has_mask) {
440 if (pred) {
441 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
442 }
443 else {
444 pred = mask->exec_mask;
445 }
446 }
447
448 /*
449 * Loop over elements of index_vec, store scalar value.
450 */
451 for (i = 0; i < bld->bld_base.base.type.length; i++) {
452 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
453 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
454 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
455 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
456 LLVMValueRef scalar_pred = pred ?
457 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
458
459 if (0)
460 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
461 ii, val, index, scalar_ptr);
462
463 if (scalar_pred) {
464 LLVMValueRef real_val, dst_val;
465 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
466 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
467 LLVMBuildStore(builder, real_val, scalar_ptr);
468 }
469 else {
470 LLVMBuildStore(builder, val, scalar_ptr);
471 }
472 }
473 }
474
475
476 /**
477 * Read the current value of the ADDR register, convert the floats to
478 * ints, add the base index and return the vector of offsets.
479 * The offsets will be used to index into the constant buffer or
480 * temporary register file.
481 */
482 static LLVMValueRef
483 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
484 unsigned reg_file, unsigned reg_index,
485 const struct tgsi_src_register *indirect_reg)
486 {
487 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
488 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
489 /* always use X component of address register */
490 unsigned swizzle = indirect_reg->SwizzleX;
491 LLVMValueRef base;
492 LLVMValueRef rel;
493 LLVMValueRef max_index;
494 LLVMValueRef index;
495
496 assert(bld->indirect_files & (1 << reg_file));
497
498 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
499
500 assert(swizzle < 4);
501 rel = LLVMBuildLoad(builder,
502 bld->addr[indirect_reg->Index][swizzle],
503 "load addr reg");
504
505 /* for indexing we want integers */
506 rel = LLVMBuildFPToSI(builder,
507 rel,
508 uint_bld->vec_type, "");
509
510 index = lp_build_add(uint_bld, base, rel);
511
512 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
513 uint_bld->type,
514 bld->bld_base.info->file_max[reg_file]);
515
516 assert(!uint_bld->type.sign);
517 index = lp_build_min(uint_bld, index, max_index);
518
519 return index;
520 }
521
522 static struct lp_build_context *
523 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
524 enum tgsi_opcode_type stype)
525 {
526 struct lp_build_context *bld_fetch;
527
528 switch (stype) {
529 case TGSI_TYPE_FLOAT:
530 case TGSI_TYPE_UNTYPED:
531 bld_fetch = &bld_base->base;
532 break;
533 case TGSI_TYPE_UNSIGNED:
534 bld_fetch = &bld_base->uint_bld;
535 break;
536 case TGSI_TYPE_SIGNED:
537 bld_fetch = &bld_base->int_bld;
538 break;
539 case TGSI_TYPE_VOID:
540 case TGSI_TYPE_DOUBLE:
541 default:
542 assert(0);
543 bld_fetch = NULL;
544 break;
545 }
546 return bld_fetch;
547 }
548
549 static LLVMValueRef
550 emit_fetch_constant(
551 struct lp_build_tgsi_context * bld_base,
552 const struct tgsi_full_src_register * reg,
553 enum tgsi_opcode_type stype,
554 unsigned swizzle)
555 {
556 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
557 struct gallivm_state *gallivm = bld_base->base.gallivm;
558 LLVMBuilderRef builder = gallivm->builder;
559 struct lp_build_context *uint_bld = &bld_base->uint_bld;
560 LLVMValueRef indirect_index = NULL;
561 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
562
563 /* XXX: Handle fetching xyzw components as a vector */
564 assert(swizzle != ~0);
565
566 if (reg->Register.Indirect) {
567 indirect_index = get_indirect_index(bld,
568 reg->Register.File,
569 reg->Register.Index,
570 &reg->Indirect);
571 }
572
573 if (reg->Register.Indirect) {
574 LLVMValueRef swizzle_vec =
575 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
576 LLVMValueRef index_vec; /* index into the const buffer */
577
578 /* index_vec = indirect_index * 4 + swizzle */
579 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
580 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
581
582 /* Gather values from the constant buffer */
583 return build_gather(bld_fetch, bld->consts_ptr, index_vec);
584 }
585 else {
586 LLVMValueRef index; /* index into the const buffer */
587 LLVMValueRef scalar, scalar_ptr;
588
589 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
590
591 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
592 &index, 1, "");
593
594 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
595 LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
596 LLVMValueRef temp_ptr;
597 temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
598 scalar = LLVMBuildLoad(builder, temp_ptr, "");
599 } else
600 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
601
602 return lp_build_broadcast_scalar(bld_fetch, scalar);
603 }
604 }
605
606 static LLVMValueRef
607 emit_fetch_immediate(
608 struct lp_build_tgsi_context * bld_base,
609 const struct tgsi_full_src_register * reg,
610 enum tgsi_opcode_type stype,
611 unsigned swizzle)
612 {
613 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
614 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
615 assert(res);
616
617 if (stype == TGSI_TYPE_UNSIGNED) {
618 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
619 } else if (stype == TGSI_TYPE_SIGNED) {
620 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
621 }
622 return res;
623 }
624
625 static LLVMValueRef
626 emit_fetch_input(
627 struct lp_build_tgsi_context * bld_base,
628 const struct tgsi_full_src_register * reg,
629 enum tgsi_opcode_type stype,
630 unsigned swizzle)
631 {
632 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
633 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
634 LLVMBuilderRef builder = gallivm->builder;
635 struct lp_build_context *uint_bld = &bld_base->uint_bld;
636 LLVMValueRef indirect_index = NULL;
637 LLVMValueRef res;
638
639 if (reg->Register.Indirect) {
640 indirect_index = get_indirect_index(bld,
641 reg->Register.File,
642 reg->Register.Index,
643 &reg->Indirect);
644 }
645
646 if (reg->Register.Indirect) {
647 LLVMValueRef swizzle_vec =
648 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
649 LLVMValueRef length_vec =
650 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
651 LLVMValueRef index_vec; /* index into the const buffer */
652 LLVMValueRef inputs_array;
653 LLVMTypeRef float4_ptr_type;
654
655 /* index_vec = (indirect_index * 4 + swizzle) * length */
656 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
657 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
658 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
659
660 /* cast inputs_array pointer to float* */
661 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
662 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
663 float4_ptr_type, "");
664
665 /* Gather values from the temporary register array */
666 res = build_gather(&bld_base->base, inputs_array, index_vec);
667 } else {
668 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
669 LLVMValueRef lindex = lp_build_const_int32(gallivm,
670 reg->Register.Index * 4 + swizzle);
671 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
672 bld->inputs_array, &lindex, 1, "");
673 res = LLVMBuildLoad(builder, input_ptr, "");
674 }
675 else {
676 res = bld->inputs[reg->Register.Index][swizzle];
677 }
678 }
679 assert(res);
680 return res;
681 }
682
683 static LLVMValueRef
684 emit_fetch_temporary(
685 struct lp_build_tgsi_context * bld_base,
686 const struct tgsi_full_src_register * reg,
687 enum tgsi_opcode_type stype,
688 unsigned swizzle)
689 {
690 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
691 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
692 LLVMBuilderRef builder = gallivm->builder;
693 struct lp_build_context *uint_bld = &bld_base->uint_bld;
694 LLVMValueRef indirect_index = NULL;
695 LLVMValueRef res;
696
697 if (reg->Register.Indirect) {
698 indirect_index = get_indirect_index(bld,
699 reg->Register.File,
700 reg->Register.Index,
701 &reg->Indirect);
702 }
703
704 if (reg->Register.Indirect) {
705 LLVMValueRef swizzle_vec =
706 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
707 LLVMValueRef length_vec =
708 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
709 bld->bld_base.base.type.length);
710 LLVMValueRef index_vec; /* index into the const buffer */
711 LLVMValueRef temps_array;
712 LLVMTypeRef float4_ptr_type;
713
714 /* index_vec = (indirect_index * 4 + swizzle) * length */
715 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
716 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
717 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
718
719 /* cast temps_array pointer to float* */
720 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
721 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
722 float4_ptr_type, "");
723
724 /* Gather values from the temporary register array */
725 res = build_gather(&bld_base->base, temps_array, index_vec);
726 }
727 else {
728 LLVMValueRef temp_ptr;
729 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
730 LLVMTypeRef itype = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
731 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
732 swizzle);
733 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
734 } else
735 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
736 res = LLVMBuildLoad(builder, temp_ptr, "");
737 if (!res)
738 return bld->bld_base.base.undef;
739 }
740
741 return res;
742 }
743
744 static LLVMValueRef
745 emit_fetch_system_value(
746 struct lp_build_tgsi_context * bld_base,
747 const struct tgsi_full_src_register * reg,
748 enum tgsi_opcode_type stype,
749 unsigned swizzle)
750 {
751 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
752 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
753 LLVMBuilderRef builder = gallivm->builder;
754 LLVMValueRef index; /* index into the system value array */
755 LLVMValueRef scalar, scalar_ptr;
756
757 assert(!reg->Register.Indirect);
758
759 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
760
761 scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, "");
762 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
763
764 return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
765 }
766
767 /**
768 * Register fetch with derivatives.
769 */
770 static void
771 emit_fetch_deriv(
772 struct lp_build_tgsi_soa_context *bld,
773 LLVMValueRef src,
774 LLVMValueRef *res,
775 LLVMValueRef *ddx,
776 LLVMValueRef *ddy)
777 {
778 if(res)
779 *res = src;
780
781 /* TODO: use interpolation coeffs for inputs */
782
783 if(ddx)
784 *ddx = lp_build_ddx(&bld->bld_base.base, src);
785
786 if(ddy)
787 *ddy = lp_build_ddy(&bld->bld_base.base, src);
788 }
789
790
791 /**
792 * Predicate.
793 */
794 static void
795 emit_fetch_predicate(
796 struct lp_build_tgsi_soa_context *bld,
797 const struct tgsi_full_instruction *inst,
798 LLVMValueRef *pred)
799 {
800 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
801 unsigned index;
802 unsigned char swizzles[4];
803 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
804 LLVMValueRef value;
805 unsigned chan;
806
807 if (!inst->Instruction.Predicate) {
808 TGSI_FOR_EACH_CHANNEL( chan ) {
809 pred[chan] = NULL;
810 }
811 return;
812 }
813
814 swizzles[0] = inst->Predicate.SwizzleX;
815 swizzles[1] = inst->Predicate.SwizzleY;
816 swizzles[2] = inst->Predicate.SwizzleZ;
817 swizzles[3] = inst->Predicate.SwizzleW;
818
819 index = inst->Predicate.Index;
820 assert(index < LP_MAX_TGSI_PREDS);
821
822 TGSI_FOR_EACH_CHANNEL( chan ) {
823 unsigned swizzle = swizzles[chan];
824
825 /*
826 * Only fetch the predicate register channels that are actually listed
827 * in the swizzles
828 */
829 if (!unswizzled[swizzle]) {
830 value = LLVMBuildLoad(builder,
831 bld->preds[index][swizzle], "");
832
833 /*
834 * Convert the value to an integer mask.
835 *
836 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
837 * is needlessly causing two comparisons due to storing the intermediate
838 * result as float vector instead of an integer mask vector.
839 */
840 value = lp_build_compare(bld->bld_base.base.gallivm,
841 bld->bld_base.base.type,
842 PIPE_FUNC_NOTEQUAL,
843 value,
844 bld->bld_base.base.zero);
845 if (inst->Predicate.Negate) {
846 value = LLVMBuildNot(builder, value, "");
847 }
848
849 unswizzled[swizzle] = value;
850 } else {
851 value = unswizzled[swizzle];
852 }
853
854 pred[chan] = value;
855 }
856 }
857
858
859 /**
860 * Register store.
861 */
862 static void
863 emit_store_chan(
864 struct lp_build_tgsi_context *bld_base,
865 const struct tgsi_full_instruction *inst,
866 unsigned index,
867 unsigned chan_index,
868 LLVMValueRef pred,
869 LLVMValueRef value)
870 {
871 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
872 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
873 LLVMBuilderRef builder = gallivm->builder;
874 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
875 struct lp_build_context *uint_bld = &bld_base->uint_bld;
876 LLVMValueRef indirect_index = NULL;
877 struct lp_build_context *bld_store;
878
879 bld_store = &bld->bld_base.base;
880
881 switch( inst->Instruction.Saturate ) {
882 case TGSI_SAT_NONE:
883 break;
884
885 case TGSI_SAT_ZERO_ONE:
886 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
887 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
888 break;
889
890 case TGSI_SAT_MINUS_PLUS_ONE:
891 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
892 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
893 break;
894
895 default:
896 assert(0);
897 }
898
899 if (reg->Register.Indirect) {
900 indirect_index = get_indirect_index(bld,
901 reg->Register.File,
902 reg->Register.Index,
903 &reg->Indirect);
904 } else {
905 assert(reg->Register.Index <=
906 bld->bld_base.info->file_max[reg->Register.File]);
907 }
908
909 switch( reg->Register.File ) {
910 case TGSI_FILE_OUTPUT:
911 if (reg->Register.Indirect) {
912 LLVMValueRef chan_vec =
913 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
914 LLVMValueRef length_vec =
915 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
916 LLVMValueRef index_vec; /* indexes into the temp registers */
917 LLVMValueRef outputs_array;
918 LLVMValueRef pixel_offsets;
919 LLVMTypeRef float_ptr_type;
920 int i;
921
922 /* build pixel offset vector: {0, 1, 2, 3, ...} */
923 pixel_offsets = uint_bld->undef;
924 for (i = 0; i < bld->bld_base.base.type.length; i++) {
925 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
926 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
927 ii, ii, "");
928 }
929
930 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
931 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
932 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
933 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
934 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
935
936 float_ptr_type =
937 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
938 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
939 float_ptr_type, "");
940
941 /* Scatter store values into temp registers */
942 emit_mask_scatter(bld, outputs_array, index_vec, value,
943 &bld->exec_mask, pred);
944 }
945 else {
946 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
947 chan_index);
948 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
949 }
950 break;
951
952 case TGSI_FILE_TEMPORARY:
953 if (reg->Register.Indirect) {
954 LLVMValueRef chan_vec =
955 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
956 LLVMValueRef length_vec =
957 lp_build_const_int_vec(gallivm, uint_bld->type,
958 bld->bld_base.base.type.length);
959 LLVMValueRef index_vec; /* indexes into the temp registers */
960 LLVMValueRef temps_array;
961 LLVMValueRef pixel_offsets;
962 LLVMTypeRef float_ptr_type;
963 int i;
964
965 /* build pixel offset vector: {0, 1, 2, 3, ...} */
966 pixel_offsets = uint_bld->undef;
967 for (i = 0; i < bld->bld_base.base.type.length; i++) {
968 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
969 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
970 ii, ii, "");
971 }
972
973 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
974 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
975 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
976 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
977 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
978
979 float_ptr_type =
980 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
981 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
982 float_ptr_type, "");
983
984 /* Scatter store values into temp registers */
985 emit_mask_scatter(bld, temps_array, index_vec, value,
986 &bld->exec_mask, pred);
987 }
988 else {
989 LLVMValueRef temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
990 chan_index);
991 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
992 }
993 break;
994
995 case TGSI_FILE_ADDRESS:
996 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
997 bld->addr[reg->Register.Index][chan_index]);
998 break;
999
1000 case TGSI_FILE_PREDICATE:
1001 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1002 bld->preds[reg->Register.Index][chan_index]);
1003 break;
1004
1005 default:
1006 assert( 0 );
1007 }
1008 }
1009
1010 static void
1011 emit_store(
1012 struct lp_build_tgsi_context * bld_base,
1013 const struct tgsi_full_instruction * inst,
1014 const struct tgsi_opcode_info * info,
1015 LLVMValueRef dst[4])
1016
1017 {
1018 unsigned chan_index;
1019 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1020
1021 if(info->num_dst) {
1022 LLVMValueRef pred[TGSI_NUM_CHANNELS];
1023
1024 emit_fetch_predicate( bld, inst, pred );
1025
1026 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1027 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1028 }
1029 }
1030 }
1031
1032 /**
1033 * High-level instruction translators.
1034 */
1035
1036 static void
1037 emit_tex( struct lp_build_tgsi_soa_context *bld,
1038 const struct tgsi_full_instruction *inst,
1039 enum lp_build_tex_modifier modifier,
1040 LLVMValueRef *texel)
1041 {
1042 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1043 unsigned unit;
1044 LLVMValueRef lod_bias, explicit_lod;
1045 LLVMValueRef oow = NULL;
1046 LLVMValueRef coords[3];
1047 LLVMValueRef ddx[3];
1048 LLVMValueRef ddy[3];
1049 unsigned num_coords;
1050 unsigned i;
1051
1052 if (!bld->sampler) {
1053 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1054 for (i = 0; i < 4; i++) {
1055 texel[i] = bld->bld_base.base.undef;
1056 }
1057 return;
1058 }
1059
1060 switch (inst->Texture.Texture) {
1061 case TGSI_TEXTURE_1D:
1062 num_coords = 1;
1063 break;
1064 case TGSI_TEXTURE_1D_ARRAY:
1065 case TGSI_TEXTURE_2D:
1066 case TGSI_TEXTURE_RECT:
1067 num_coords = 2;
1068 break;
1069 case TGSI_TEXTURE_SHADOW1D:
1070 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1071 case TGSI_TEXTURE_SHADOW2D:
1072 case TGSI_TEXTURE_SHADOWRECT:
1073 case TGSI_TEXTURE_2D_ARRAY:
1074 case TGSI_TEXTURE_3D:
1075 case TGSI_TEXTURE_CUBE:
1076 num_coords = 3;
1077 break;
1078 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1079 num_coords = 4;
1080 break;
1081 default:
1082 assert(0);
1083 return;
1084 }
1085
1086 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1087 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1088 explicit_lod = NULL;
1089 }
1090 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1091 lod_bias = NULL;
1092 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1093 }
1094 else {
1095 lod_bias = NULL;
1096 explicit_lod = NULL;
1097 }
1098
1099 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1100 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1101 oow = lp_build_rcp(&bld->bld_base.base, oow);
1102 }
1103
1104 for (i = 0; i < num_coords; i++) {
1105 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1106 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1107 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1108 }
1109 for (i = num_coords; i < 3; i++) {
1110 coords[i] = bld->bld_base.base.undef;
1111 }
1112
1113 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1114 LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
1115 for (i = 0; i < num_coords; i++) {
1116 LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i );
1117 LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i );
1118 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1119 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1120 }
1121 unit = inst->Src[3].Register.Index;
1122 } else {
1123 for (i = 0; i < num_coords; i++) {
1124 ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] );
1125 ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] );
1126 }
1127 unit = inst->Src[1].Register.Index;
1128 }
1129 for (i = num_coords; i < 3; i++) {
1130 ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
1131 ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
1132 }
1133
1134 bld->sampler->emit_fetch_texel(bld->sampler,
1135 bld->bld_base.base.gallivm,
1136 bld->bld_base.base.type,
1137 unit, num_coords, coords,
1138 ddx, ddy,
1139 lod_bias, explicit_lod,
1140 texel);
1141 }
1142
1143 static boolean
1144 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1145 int pc)
1146 {
1147 int i;
1148
1149 for (i = 0; i < 5; i++) {
1150 unsigned opcode;
1151
1152 if (pc + i >= bld->bld_base.info->num_instructions)
1153 return TRUE;
1154
1155 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1156
1157 if (opcode == TGSI_OPCODE_END)
1158 return TRUE;
1159
1160 if (opcode == TGSI_OPCODE_TEX ||
1161 opcode == TGSI_OPCODE_TXP ||
1162 opcode == TGSI_OPCODE_TXD ||
1163 opcode == TGSI_OPCODE_TXB ||
1164 opcode == TGSI_OPCODE_TXL ||
1165 opcode == TGSI_OPCODE_TXF ||
1166 opcode == TGSI_OPCODE_TXQ ||
1167 opcode == TGSI_OPCODE_CAL ||
1168 opcode == TGSI_OPCODE_CALLNZ ||
1169 opcode == TGSI_OPCODE_IF ||
1170 opcode == TGSI_OPCODE_IFC ||
1171 opcode == TGSI_OPCODE_BGNLOOP ||
1172 opcode == TGSI_OPCODE_SWITCH)
1173 return FALSE;
1174 }
1175
1176 return TRUE;
1177 }
1178
1179
1180
1181 /**
1182 * Kill fragment if any of the src register values are negative.
1183 */
1184 static void
1185 emit_kil(
1186 struct lp_build_tgsi_soa_context *bld,
1187 const struct tgsi_full_instruction *inst,
1188 int pc)
1189 {
1190 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1191 const struct tgsi_full_src_register *reg = &inst->Src[0];
1192 LLVMValueRef terms[TGSI_NUM_CHANNELS];
1193 LLVMValueRef mask;
1194 unsigned chan_index;
1195
1196 memset(&terms, 0, sizeof terms);
1197
1198 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1199 unsigned swizzle;
1200
1201 /* Unswizzle channel */
1202 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1203
1204 /* Check if the component has not been already tested. */
1205 assert(swizzle < TGSI_NUM_CHANNELS);
1206 if( !terms[swizzle] )
1207 /* TODO: change the comparison operator instead of setting the sign */
1208 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1209 }
1210
1211 mask = NULL;
1212 TGSI_FOR_EACH_CHANNEL( chan_index ) {
1213 if(terms[chan_index]) {
1214 LLVMValueRef chan_mask;
1215
1216 /*
1217 * If term < 0 then mask = 0 else mask = ~0.
1218 */
1219 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1220
1221 if(mask)
1222 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1223 else
1224 mask = chan_mask;
1225 }
1226 }
1227
1228 if(mask) {
1229 lp_build_mask_update(bld->mask, mask);
1230
1231 if (!near_end_of_shader(bld, pc))
1232 lp_build_mask_check(bld->mask);
1233 }
1234 }
1235
1236
1237 /**
1238 * Predicated fragment kill.
1239 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1240 * The only predication is the execution mask which will apply if
1241 * we're inside a loop or conditional.
1242 */
1243 static void
1244 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1245 int pc)
1246 {
1247 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1248 LLVMValueRef mask;
1249
1250 /* For those channels which are "alive", disable fragment shader
1251 * execution.
1252 */
1253 if (bld->exec_mask.has_mask) {
1254 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1255 }
1256 else {
1257 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1258 mask = zero;
1259 }
1260
1261 lp_build_mask_update(bld->mask, mask);
1262
1263 if (!near_end_of_shader(bld, pc))
1264 lp_build_mask_check(bld->mask);
1265 }
1266
1267
1268 /**
1269 * Emit code which will dump the value of all the temporary registers
1270 * to stdout.
1271 */
1272 static void
1273 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1274 {
1275 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1276 LLVMBuilderRef builder = gallivm->builder;
1277 LLVMValueRef temp_ptr;
1278 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1279 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1280 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1281 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1282 int index;
1283 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1284
1285 for (index = 0; index < n; index++) {
1286 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1287 LLVMValueRef v[4][4], res;
1288 int chan;
1289
1290 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1291
1292 for (chan = 0; chan < 4; chan++) {
1293 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1294 res = LLVMBuildLoad(builder, temp_ptr, "");
1295 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1296 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1297 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1298 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1299 }
1300
1301 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1302 v[0][0], v[0][1], v[0][2], v[0][3]);
1303 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1304 v[1][0], v[1][1], v[1][2], v[1][3]);
1305 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1306 v[2][0], v[2][1], v[2][2], v[2][3]);
1307 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1308 v[3][0], v[3][1], v[3][2], v[3][3]);
1309 }
1310 }
1311
1312
1313
1314 void
1315 lp_emit_declaration_soa(
1316 struct lp_build_tgsi_context *bld_base,
1317 const struct tgsi_full_declaration *decl)
1318 {
1319 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1320 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1321 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1322 const unsigned first = decl->Range.First;
1323 const unsigned last = decl->Range.Last;
1324 unsigned idx, i;
1325
1326 for (idx = first; idx <= last; ++idx) {
1327 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1328 switch (decl->Declaration.File) {
1329 case TGSI_FILE_TEMPORARY:
1330 assert(idx < LP_MAX_TGSI_TEMPS);
1331 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1332 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1333 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1334 }
1335 break;
1336
1337 case TGSI_FILE_OUTPUT:
1338 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1339 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1340 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1341 vec_type, "output");
1342 }
1343 break;
1344
1345 case TGSI_FILE_ADDRESS:
1346 assert(idx < LP_MAX_TGSI_ADDRS);
1347 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1348 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
1349 break;
1350
1351 case TGSI_FILE_PREDICATE:
1352 assert(idx < LP_MAX_TGSI_PREDS);
1353 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1354 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1355 "predicate");
1356 break;
1357
1358 default:
1359 /* don't need to declare other vars */
1360 break;
1361 }
1362 }
1363 }
1364
1365
1366 void lp_emit_immediate_soa(
1367 struct lp_build_tgsi_context *bld_base,
1368 const struct tgsi_full_immediate *imm)
1369 {
1370 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1371 struct gallivm_state * gallivm = bld_base->base.gallivm;
1372
1373 /* simply copy the immediate values into the next immediates[] slot */
1374 unsigned i;
1375 const uint size = imm->Immediate.NrTokens - 1;
1376 assert(size <= 4);
1377 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1378
1379 for( i = 0; i < size; ++i )
1380 bld->immediates[bld->num_immediates][i] =
1381 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1382
1383 for( i = size; i < 4; ++i )
1384 bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1385
1386 bld->num_immediates++;
1387 }
1388
1389 static void
1390 ddx_emit(
1391 const struct lp_build_tgsi_action * action,
1392 struct lp_build_tgsi_context * bld_base,
1393 struct lp_build_emit_data * emit_data)
1394 {
1395 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1396
1397 emit_fetch_deriv(bld, emit_data->args[0], NULL,
1398 &emit_data->output[emit_data->chan], NULL);
1399 }
1400
1401 static void
1402 ddy_emit(
1403 const struct lp_build_tgsi_action * action,
1404 struct lp_build_tgsi_context * bld_base,
1405 struct lp_build_emit_data * emit_data)
1406 {
1407 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1408
1409 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1410 &emit_data->output[emit_data->chan]);
1411 }
1412
1413 static void
1414 kilp_emit(
1415 const struct lp_build_tgsi_action * action,
1416 struct lp_build_tgsi_context * bld_base,
1417 struct lp_build_emit_data * emit_data)
1418 {
1419 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1420
1421 emit_kilp(bld, bld_base->pc - 1);
1422 }
1423
1424 static void
1425 kil_emit(
1426 const struct lp_build_tgsi_action * action,
1427 struct lp_build_tgsi_context * bld_base,
1428 struct lp_build_emit_data * emit_data)
1429 {
1430 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1431
1432 emit_kil(bld, emit_data->inst, bld_base->pc - 1);
1433 }
1434
1435 static void
1436 tex_emit(
1437 const struct lp_build_tgsi_action * action,
1438 struct lp_build_tgsi_context * bld_base,
1439 struct lp_build_emit_data * emit_data)
1440 {
1441 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1442
1443 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
1444 }
1445
1446 static void
1447 txb_emit(
1448 const struct lp_build_tgsi_action * action,
1449 struct lp_build_tgsi_context * bld_base,
1450 struct lp_build_emit_data * emit_data)
1451 {
1452 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1453
1454 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
1455 emit_data->output);
1456 }
1457
1458 static void
1459 txd_emit(
1460 const struct lp_build_tgsi_action * action,
1461 struct lp_build_tgsi_context * bld_base,
1462 struct lp_build_emit_data * emit_data)
1463 {
1464 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1465
1466 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
1467 emit_data->output);
1468 }
1469
1470 static void
1471 txl_emit(
1472 const struct lp_build_tgsi_action * action,
1473 struct lp_build_tgsi_context * bld_base,
1474 struct lp_build_emit_data * emit_data)
1475 {
1476 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1477
1478 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
1479 emit_data->output);
1480 }
1481
1482 static void
1483 txp_emit(
1484 const struct lp_build_tgsi_action * action,
1485 struct lp_build_tgsi_context * bld_base,
1486 struct lp_build_emit_data * emit_data)
1487 {
1488 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1489
1490 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
1491 emit_data->output);
1492 }
1493
1494 static void
1495 cal_emit(
1496 const struct lp_build_tgsi_action * action,
1497 struct lp_build_tgsi_context * bld_base,
1498 struct lp_build_emit_data * emit_data)
1499 {
1500 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1501
1502 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
1503 &bld_base->pc);
1504 }
1505
1506 static void
1507 ret_emit(
1508 const struct lp_build_tgsi_action * action,
1509 struct lp_build_tgsi_context * bld_base,
1510 struct lp_build_emit_data * emit_data)
1511 {
1512 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1513
1514 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
1515 }
1516
1517 static void
1518 brk_emit(
1519 const struct lp_build_tgsi_action * action,
1520 struct lp_build_tgsi_context * bld_base,
1521 struct lp_build_emit_data * emit_data)
1522 {
1523 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1524
1525 lp_exec_break(&bld->exec_mask);
1526 }
1527
1528 static void
1529 if_emit(
1530 const struct lp_build_tgsi_action * action,
1531 struct lp_build_tgsi_context * bld_base,
1532 struct lp_build_emit_data * emit_data)
1533 {
1534 LLVMValueRef tmp;
1535 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1536
1537 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
1538 emit_data->args[0], bld->bld_base.base.zero);
1539 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
1540 }
1541
1542 static void
1543 bgnloop_emit(
1544 const struct lp_build_tgsi_action * action,
1545 struct lp_build_tgsi_context * bld_base,
1546 struct lp_build_emit_data * emit_data)
1547 {
1548 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1549
1550 lp_exec_bgnloop(&bld->exec_mask);
1551 }
1552
1553 static void
1554 bgnsub_emit(
1555 const struct lp_build_tgsi_action * action,
1556 struct lp_build_tgsi_context * bld_base,
1557 struct lp_build_emit_data * emit_data)
1558 {
1559 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1560
1561 lp_exec_mask_bgnsub(&bld->exec_mask);
1562 }
1563
1564 static void
1565 else_emit(
1566 const struct lp_build_tgsi_action * action,
1567 struct lp_build_tgsi_context * bld_base,
1568 struct lp_build_emit_data * emit_data)
1569 {
1570 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1571
1572 lp_exec_mask_cond_invert(&bld->exec_mask);
1573 }
1574
1575 static void
1576 endif_emit(
1577 const struct lp_build_tgsi_action * action,
1578 struct lp_build_tgsi_context * bld_base,
1579 struct lp_build_emit_data * emit_data)
1580 {
1581 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1582
1583 lp_exec_mask_cond_pop(&bld->exec_mask);
1584 }
1585
1586 static void
1587 endloop_emit(
1588 const struct lp_build_tgsi_action * action,
1589 struct lp_build_tgsi_context * bld_base,
1590 struct lp_build_emit_data * emit_data)
1591 {
1592 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1593
1594 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1595 }
1596
1597 static void
1598 endsub_emit(
1599 const struct lp_build_tgsi_action * action,
1600 struct lp_build_tgsi_context * bld_base,
1601 struct lp_build_emit_data * emit_data)
1602 {
1603 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1604
1605 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
1606 }
1607
1608 static void
1609 cont_emit(
1610 const struct lp_build_tgsi_action * action,
1611 struct lp_build_tgsi_context * bld_base,
1612 struct lp_build_emit_data * emit_data)
1613 {
1614 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1615
1616 lp_exec_continue(&bld->exec_mask);
1617 }
1618
1619 /* XXX: Refactor and move it to lp_bld_tgsi_action.c
1620 *
1621 * XXX: What do the comments about xmm registers mean? Maybe they are left over
1622 * from old code, but there is no garauntee that LLVM will use those registers
1623 * for this code.
1624 *
1625 * XXX: There should be no calls to lp_build_emit_fetch in this function. This
1626 * should be handled by the emit_data->fetch_args function. */
1627 static void
1628 nrm_emit(
1629 const struct lp_build_tgsi_action * action,
1630 struct lp_build_tgsi_context * bld_base,
1631 struct lp_build_emit_data * emit_data)
1632 {
1633 LLVMValueRef tmp0, tmp1;
1634 LLVMValueRef tmp4 = NULL;
1635 LLVMValueRef tmp5 = NULL;
1636 LLVMValueRef tmp6 = NULL;
1637 LLVMValueRef tmp7 = NULL;
1638 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1639
1640 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1641
1642 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
1643 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
1644 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
1645 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
1646
1647 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1648
1649 /* xmm4 = src.x */
1650 /* xmm0 = src.x * src.x */
1651 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
1652 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1653 tmp4 = tmp0;
1654 }
1655 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
1656
1657 /* xmm5 = src.y */
1658 /* xmm0 = xmm0 + src.y * src.y */
1659 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
1660 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1661 tmp5 = tmp1;
1662 }
1663 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1664 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1665
1666 /* xmm6 = src.z */
1667 /* xmm0 = xmm0 + src.z * src.z */
1668 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
1669 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1670 tmp6 = tmp1;
1671 }
1672 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1673 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1674
1675 if (dims == 4) {
1676 /* xmm7 = src.w */
1677 /* xmm0 = xmm0 + src.w * src.w */
1678 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1679 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
1680 tmp7 = tmp1;
1681 }
1682 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1683 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1684 }
1685 /* xmm1 = 1 / sqrt(xmm0) */
1686 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
1687 /* dst.x = xmm1 * src.x */
1688 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1689 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
1690 }
1691 /* dst.y = xmm1 * src.y */
1692 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1693 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
1694 }
1695
1696 /* dst.z = xmm1 * src.z */
1697 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1698 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
1699 }
1700 /* dst.w = xmm1 * src.w */
1701 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
1702 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
1703 }
1704 }
1705
1706 /* dst.w = 1.0 */
1707 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
1708 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
1709 }
1710 }
1711
1712 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
1713 {
1714 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1715 struct gallivm_state * gallivm = bld_base->base.gallivm;
1716
1717 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1718 LLVMValueRef array_size =
1719 lp_build_const_int32(gallivm,
1720 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
1721 bld->temps_array = lp_build_array_alloca(gallivm,
1722 bld_base->base.vec_type, array_size,
1723 "temp_array");
1724 }
1725
1726 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1727 LLVMValueRef array_size =
1728 lp_build_const_int32(gallivm,
1729 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
1730 bld->outputs_array = lp_build_array_alloca(gallivm,
1731 bld_base->base.vec_type, array_size,
1732 "output_array");
1733 }
1734
1735 /* If we have indirect addressing in inputs we need to copy them into
1736 * our alloca array to be able to iterate over them */
1737 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1738 unsigned index, chan;
1739 LLVMTypeRef vec_type = bld_base->base.vec_type;
1740 LLVMValueRef array_size = lp_build_const_int32(gallivm,
1741 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
1742 bld->inputs_array = lp_build_array_alloca(gallivm,
1743 vec_type, array_size,
1744 "input_array");
1745
1746 assert(bld_base->info->num_inputs
1747 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
1748
1749 for (index = 0; index < bld_base->info->num_inputs; ++index) {
1750 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1751 LLVMValueRef lindex =
1752 lp_build_const_int32(gallivm, index * 4 + chan);
1753 LLVMValueRef input_ptr =
1754 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1755 &lindex, 1, "");
1756 LLVMValueRef value = bld->inputs[index][chan];
1757 if (value)
1758 LLVMBuildStore(gallivm->builder, value, input_ptr);
1759 }
1760 }
1761 }
1762 }
1763
1764 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
1765 {
1766 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1767
1768 if (0) {
1769 /* for debugging */
1770 emit_dump_temps(bld);
1771 }
1772
1773 /* If we have indirect addressing in outputs we need to copy our alloca array
1774 * to the outputs slots specified by the called */
1775 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1776 unsigned index, chan;
1777 assert(bld_base->info->num_outputs <=
1778 bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
1779 for (index = 0; index < bld_base->info->num_outputs; ++index) {
1780 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1781 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
1782 }
1783 }
1784 }
1785 }
1786
1787 void
1788 lp_build_tgsi_soa(struct gallivm_state *gallivm,
1789 const struct tgsi_token *tokens,
1790 struct lp_type type,
1791 struct lp_build_mask_context *mask,
1792 LLVMValueRef consts_ptr,
1793 LLVMValueRef system_values_array,
1794 const LLVMValueRef *pos,
1795 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
1796 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1797 struct lp_build_sampler_soa *sampler,
1798 const struct tgsi_shader_info *info)
1799 {
1800 struct lp_build_tgsi_soa_context bld;
1801
1802 struct lp_type res_type;
1803
1804 assert(type.length <= LP_MAX_VECTOR_LENGTH);
1805 memset(&res_type, 0, sizeof res_type);
1806 res_type.width = type.width;
1807 res_type.length = type.length;
1808 res_type.sign = 1;
1809
1810 /* Setup build context */
1811 memset(&bld, 0, sizeof bld);
1812 lp_build_context_init(&bld.bld_base.base, gallivm, type);
1813 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1814 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1815 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
1816 bld.mask = mask;
1817 bld.pos = pos;
1818 bld.inputs = inputs;
1819 bld.outputs = outputs;
1820 bld.consts_ptr = consts_ptr;
1821 bld.sampler = sampler;
1822 bld.bld_base.info = info;
1823 bld.indirect_files = info->indirect_files;
1824
1825 bld.bld_base.soa = TRUE;
1826 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1827 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1828 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1829 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1830 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
1831 bld.bld_base.emit_store = emit_store;
1832
1833 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
1834 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
1835
1836 bld.bld_base.emit_prologue = emit_prologue;
1837 bld.bld_base.emit_epilogue = emit_epilogue;
1838
1839 /* Set opcode actions */
1840 lp_set_default_actions_cpu(&bld.bld_base);
1841
1842 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1843 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
1844 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1845 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
1846 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1847 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
1848 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
1849 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1850 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1851 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1852 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
1853 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
1854 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
1855 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
1856 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
1857 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
1858 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
1859 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
1860 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
1861 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
1862 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
1863 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
1864
1865 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
1866
1867
1868 bld.system_values_array = system_values_array;
1869
1870 lp_build_tgsi_llvm(&bld.bld_base, tokens);
1871
1872 if (0) {
1873 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1874 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1875 debug_printf("11111111111111111111111111111 \n");
1876 tgsi_dump(tokens, 0);
1877 lp_debug_dump_value(function);
1878 debug_printf("2222222222222222222222222222 \n");
1879 }
1880
1881 if (0) {
1882 LLVMModuleRef module = LLVMGetGlobalParent(
1883 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1884 LLVMDumpModule(module);
1885
1886 }
1887 }
1888
1889
1890 /**
1891 * Build up the system values array out of individual values such as
1892 * the instance ID, front-face, primitive ID, etc. The shader info is
1893 * used to determine which system values are needed and where to put
1894 * them in the system values array.
1895 *
1896 * XXX only instance ID is implemented at this time.
1897 *
1898 * The system values register file is similar to the constants buffer.
1899 * Example declaration:
1900 * DCL SV[0], INSTANCEID
1901 * Example instruction:
1902 * MOVE foo, SV[0].xxxx;
1903 *
1904 * \return LLVM float array (interpreted as float [][4])
1905 */
1906 LLVMValueRef
1907 lp_build_system_values_array(struct gallivm_state *gallivm,
1908 const struct tgsi_shader_info *info,
1909 LLVMValueRef instance_id,
1910 LLVMValueRef facing)
1911 {
1912 LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values);
1913 LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context);
1914 LLVMValueRef array = lp_build_array_alloca(gallivm, float_t,
1915 size, "sysvals_array");
1916 unsigned i;
1917
1918 for (i = 0; i < info->num_system_values; i++) {
1919 LLVMValueRef index = lp_build_const_int32(gallivm, i * 4);
1920 LLVMValueRef ptr, value = 0;
1921
1922 switch (info->system_value_semantic_name[i]) {
1923 case TGSI_SEMANTIC_INSTANCEID:
1924 /* convert instance ID from int to float */
1925 value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t,
1926 "sysval_instanceid");
1927 break;
1928 case TGSI_SEMANTIC_FACE:
1929 /* fall-through */
1930 default:
1931 assert(0 && "unexpected semantic in build_system_values_array()");
1932 }
1933
1934 ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, "");
1935 LLVMBuildStore(gallivm->builder, value, ptr);
1936 }
1937
1938 return array;
1939 }