6a10af3ae4444a92fd6b08012ab7f9f4d7dd3c12
[mesa.git] / src / gallium / drivers / radeon / radeon_setup_tgsi_llvm.c
1 /*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Tom Stellard <thomas.stellard@amd.com>
24 *
25 */
26 #include "radeon_llvm.h"
27
28 #include "gallivm/lp_bld_const.h"
29 #include "gallivm/lp_bld_gather.h"
30 #include "gallivm/lp_bld_flow.h"
31 #include "gallivm/lp_bld_init.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_misc.h"
34 #include "gallivm/lp_bld_swizzle.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "util/u_math.h"
38 #include "util/u_memory.h"
39 #include "util/u_debug.h"
40
41 #include <stdio.h>
42 #include <llvm-c/Core.h>
43 #include <llvm-c/Transforms/Scalar.h>
44
45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
46 */
47 struct radeon_llvm_flow {
48 /* Loop exit or next part of if/else/endif. */
49 LLVMBasicBlockRef next_block;
50 LLVMBasicBlockRef loop_entry_block;
51 };
52
53 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
54 enum tgsi_opcode_type type)
55 {
56 LLVMContextRef ctx = bld_base->base.gallivm->context;
57
58 switch (type) {
59 case TGSI_TYPE_UNSIGNED:
60 case TGSI_TYPE_SIGNED:
61 return LLVMInt32TypeInContext(ctx);
62 case TGSI_TYPE_UNSIGNED64:
63 case TGSI_TYPE_SIGNED64:
64 return LLVMInt64TypeInContext(ctx);
65 case TGSI_TYPE_DOUBLE:
66 return LLVMDoubleTypeInContext(ctx);
67 case TGSI_TYPE_UNTYPED:
68 case TGSI_TYPE_FLOAT:
69 return LLVMFloatTypeInContext(ctx);
70 default: break;
71 }
72 return 0;
73 }
74
75 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
76 enum tgsi_opcode_type type, LLVMValueRef value)
77 {
78 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
79 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
80
81 if (dst_type)
82 return LLVMBuildBitCast(builder, value, dst_type, "");
83 else
84 return value;
85 }
86
87 /**
88 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
89 * or an undefined value in the same interval otherwise.
90 */
91 LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
92 LLVMValueRef index,
93 unsigned num)
94 {
95 struct gallivm_state *gallivm = &ctx->gallivm;
96 LLVMBuilderRef builder = gallivm->builder;
97 LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
98 LLVMValueRef cc;
99
100 if (util_is_power_of_two(num)) {
101 index = LLVMBuildAnd(builder, index, c_max, "");
102 } else {
103 /* In theory, this MAX pattern should result in code that is
104 * as good as the bit-wise AND above.
105 *
106 * In practice, LLVM generates worse code (at the time of
107 * writing), because its value tracking is not strong enough.
108 */
109 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
110 index = LLVMBuildSelect(builder, cc, index, c_max, "");
111 }
112
113 return index;
114 }
115
116 static struct radeon_llvm_flow *
117 get_current_flow(struct radeon_llvm_context *ctx)
118 {
119 if (ctx->flow_depth > 0)
120 return &ctx->flow[ctx->flow_depth - 1];
121 return NULL;
122 }
123
124 static struct radeon_llvm_flow *
125 get_innermost_loop(struct radeon_llvm_context *ctx)
126 {
127 for (unsigned i = ctx->flow_depth; i > 0; --i) {
128 if (ctx->flow[i - 1].loop_entry_block)
129 return &ctx->flow[i - 1];
130 }
131 return NULL;
132 }
133
134 static struct radeon_llvm_flow *
135 push_flow(struct radeon_llvm_context *ctx)
136 {
137 struct radeon_llvm_flow *flow;
138
139 if (ctx->flow_depth >= ctx->flow_depth_max) {
140 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
141 ctx->flow = REALLOC(ctx->flow,
142 ctx->flow_depth_max * sizeof(*ctx->flow),
143 new_max * sizeof(*ctx->flow));
144 ctx->flow_depth_max = new_max;
145 }
146
147 flow = &ctx->flow[ctx->flow_depth];
148 ctx->flow_depth++;
149
150 flow->next_block = NULL;
151 flow->loop_entry_block = NULL;
152 return flow;
153 }
154
155 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
156 {
157 return (index * 4) + chan;
158 }
159
160 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
161 LLVMValueRef value,
162 unsigned swizzle_x,
163 unsigned swizzle_y,
164 unsigned swizzle_z,
165 unsigned swizzle_w)
166 {
167 LLVMValueRef swizzles[4];
168 LLVMTypeRef i32t =
169 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
170
171 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
172 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
173 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
174 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
175
176 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
177 value,
178 LLVMGetUndef(LLVMTypeOf(value)),
179 LLVMConstVector(swizzles, 4), "");
180 }
181
182 /**
183 * Return the description of the array covering the given temporary register
184 * index.
185 */
186 static unsigned
187 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
188 unsigned reg_index,
189 const struct tgsi_ind_register *reg)
190 {
191 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
192 unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
193 unsigned i;
194
195 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
196 return reg->ArrayID;
197
198 for (i = 0; i < num_arrays; i++) {
199 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
200
201 if (reg_index >= array->range.First && reg_index <= array->range.Last)
202 return i + 1;
203 }
204
205 return 0;
206 }
207
208 static struct tgsi_declaration_range
209 get_array_range(struct lp_build_tgsi_context *bld_base,
210 unsigned File, unsigned reg_index,
211 const struct tgsi_ind_register *reg)
212 {
213 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
214 struct tgsi_declaration_range range;
215
216 if (File == TGSI_FILE_TEMPORARY) {
217 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
218 if (array_id)
219 return ctx->temp_arrays[array_id - 1].range;
220 }
221
222 range.First = 0;
223 range.Last = bld_base->info->file_max[File];
224 return range;
225 }
226
227 static LLVMValueRef
228 emit_array_index(struct lp_build_tgsi_soa_context *bld,
229 const struct tgsi_ind_register *reg,
230 unsigned offset)
231 {
232 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
233
234 if (!reg) {
235 return lp_build_const_int32(gallivm, offset);
236 }
237 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
238 return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
239 }
240
241 /**
242 * For indirect registers, construct a pointer directly to the requested
243 * element using getelementptr if possible.
244 *
245 * Returns NULL if the insertelement/extractelement fallback for array access
246 * must be used.
247 */
248 static LLVMValueRef
249 get_pointer_into_array(struct radeon_llvm_context *ctx,
250 unsigned file,
251 unsigned swizzle,
252 unsigned reg_index,
253 const struct tgsi_ind_register *reg_indirect)
254 {
255 unsigned array_id;
256 struct tgsi_array_info *array;
257 struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
258 LLVMBuilderRef builder = gallivm->builder;
259 LLVMValueRef idxs[2];
260 LLVMValueRef index;
261 LLVMValueRef alloca;
262
263 if (file != TGSI_FILE_TEMPORARY)
264 return NULL;
265
266 array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect);
267 if (!array_id)
268 return NULL;
269
270 alloca = ctx->temp_array_allocas[array_id - 1];
271 if (!alloca)
272 return NULL;
273
274 array = &ctx->temp_arrays[array_id - 1];
275
276 if (!(array->writemask & (1 << swizzle)))
277 return ctx->undef_alloca;
278
279 index = emit_array_index(&ctx->soa, reg_indirect,
280 reg_index - ctx->temp_arrays[array_id - 1].range.First);
281
282 /* Ensure that the index is within a valid range, to guard against
283 * VM faults and overwriting critical data (e.g. spilled resource
284 * descriptors).
285 *
286 * TODO It should be possible to avoid the additional instructions
287 * if LLVM is changed so that it guarantuees:
288 * 1. the scratch space descriptor isolates the current wave (this
289 * could even save the scratch offset SGPR at the cost of an
290 * additional SALU instruction)
291 * 2. the memory for allocas must be allocated at the _end_ of the
292 * scratch space (after spilled registers)
293 */
294 index = radeon_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
295
296 index = LLVMBuildMul(
297 builder, index,
298 lp_build_const_int32(gallivm, util_bitcount(array->writemask)),
299 "");
300 index = LLVMBuildAdd(
301 builder, index,
302 lp_build_const_int32(
303 gallivm,
304 util_bitcount(array->writemask & ((1 << swizzle) - 1))),
305 "");
306 idxs[0] = ctx->soa.bld_base.uint_bld.zero;
307 idxs[1] = index;
308 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
309 }
310
311 LLVMValueRef
312 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
313 enum tgsi_opcode_type type,
314 LLVMValueRef ptr,
315 LLVMValueRef ptr2)
316 {
317 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
318 LLVMValueRef result;
319
320 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
321
322 result = LLVMBuildInsertElement(builder,
323 result,
324 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
325 bld_base->int_bld.zero, "");
326 result = LLVMBuildInsertElement(builder,
327 result,
328 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
329 bld_base->int_bld.one, "");
330 return bitcast(bld_base, type, result);
331 }
332
333 static LLVMValueRef
334 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
335 unsigned File, enum tgsi_opcode_type type,
336 struct tgsi_declaration_range range,
337 unsigned swizzle)
338 {
339 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
340 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
341 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
342
343 unsigned i, size = range.Last - range.First + 1;
344 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
345 LLVMValueRef result = LLVMGetUndef(vec);
346
347 struct tgsi_full_src_register tmp_reg = {};
348 tmp_reg.Register.File = File;
349
350 for (i = 0; i < size; ++i) {
351 tmp_reg.Register.Index = i + range.First;
352 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
353 result = LLVMBuildInsertElement(builder, result, temp,
354 lp_build_const_int32(gallivm, i), "array_vector");
355 }
356 return result;
357 }
358
359 static LLVMValueRef
360 load_value_from_array(struct lp_build_tgsi_context *bld_base,
361 unsigned file,
362 enum tgsi_opcode_type type,
363 unsigned swizzle,
364 unsigned reg_index,
365 const struct tgsi_ind_register *reg_indirect)
366 {
367 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
368 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
369 struct gallivm_state *gallivm = bld_base->base.gallivm;
370 LLVMBuilderRef builder = gallivm->builder;
371 LLVMValueRef ptr;
372
373 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
374 if (ptr) {
375 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
376 if (tgsi_type_is_64bit(type)) {
377 LLVMValueRef ptr_hi, val_hi;
378 ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, "");
379 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
380 val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
381 }
382
383 return val;
384 } else {
385 struct tgsi_declaration_range range =
386 get_array_range(bld_base, file, reg_index, reg_indirect);
387 LLVMValueRef index =
388 emit_array_index(bld, reg_indirect, reg_index - range.First);
389 LLVMValueRef array =
390 emit_array_fetch(bld_base, file, type, range, swizzle);
391 return LLVMBuildExtractElement(builder, array, index, "");
392 }
393 }
394
395 static void
396 store_value_to_array(struct lp_build_tgsi_context *bld_base,
397 LLVMValueRef value,
398 unsigned file,
399 unsigned chan_index,
400 unsigned reg_index,
401 const struct tgsi_ind_register *reg_indirect)
402 {
403 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
404 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
405 struct gallivm_state *gallivm = bld_base->base.gallivm;
406 LLVMBuilderRef builder = gallivm->builder;
407 LLVMValueRef ptr;
408
409 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
410 if (ptr) {
411 LLVMBuildStore(builder, value, ptr);
412 } else {
413 unsigned i, size;
414 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
415 LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
416 LLVMValueRef array =
417 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
418 LLVMValueRef temp_ptr;
419
420 array = LLVMBuildInsertElement(builder, array, value, index, "");
421
422 size = range.Last - range.First + 1;
423 for (i = 0; i < size; ++i) {
424 switch(file) {
425 case TGSI_FILE_OUTPUT:
426 temp_ptr = bld->outputs[i + range.First][chan_index];
427 break;
428
429 case TGSI_FILE_TEMPORARY:
430 if (range.First + i >= ctx->temps_count)
431 continue;
432 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
433 break;
434
435 default:
436 continue;
437 }
438 value = LLVMBuildExtractElement(builder, array,
439 lp_build_const_int32(gallivm, i), "");
440 LLVMBuildStore(builder, value, temp_ptr);
441 }
442 }
443 }
444
445 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
446 const struct tgsi_full_src_register *reg,
447 enum tgsi_opcode_type type,
448 unsigned swizzle)
449 {
450 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
451 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
452 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
453 LLVMValueRef result = NULL, ptr, ptr2;
454
455 if (swizzle == ~0) {
456 LLVMValueRef values[TGSI_NUM_CHANNELS];
457 unsigned chan;
458 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
459 values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
460 }
461 return lp_build_gather_values(bld_base->base.gallivm, values,
462 TGSI_NUM_CHANNELS);
463 }
464
465 if (reg->Register.Indirect) {
466 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
467 swizzle, reg->Register.Index, &reg->Indirect);
468 return bitcast(bld_base, type, load);
469 }
470
471 switch(reg->Register.File) {
472 case TGSI_FILE_IMMEDIATE: {
473 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
474 if (tgsi_type_is_64bit(type)) {
475 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
476 result = LLVMConstInsertElement(result,
477 bld->immediates[reg->Register.Index][swizzle],
478 bld_base->int_bld.zero);
479 result = LLVMConstInsertElement(result,
480 bld->immediates[reg->Register.Index][swizzle + 1],
481 bld_base->int_bld.one);
482 return LLVMConstBitCast(result, ctype);
483 } else {
484 return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
485 }
486 }
487
488 case TGSI_FILE_INPUT: {
489 unsigned index = reg->Register.Index;
490 LLVMValueRef input[4];
491
492 /* I don't think doing this for vertex shaders is beneficial.
493 * For those, we want to make sure the VMEM loads are executed
494 * only once. Fragment shaders don't care much, because
495 * v_interp instructions are much cheaper than VMEM loads.
496 */
497 if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
498 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
499 else
500 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
501
502 result = input[swizzle];
503
504 if (tgsi_type_is_64bit(type)) {
505 ptr = result;
506 ptr2 = input[swizzle + 1];
507 return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
508 }
509 break;
510 }
511
512 case TGSI_FILE_TEMPORARY:
513 if (reg->Register.Index >= ctx->temps_count)
514 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
515 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
516 if (tgsi_type_is_64bit(type)) {
517 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
518 return radeon_llvm_emit_fetch_64bit(bld_base, type,
519 LLVMBuildLoad(builder, ptr, ""),
520 LLVMBuildLoad(builder, ptr2, ""));
521 }
522 result = LLVMBuildLoad(builder, ptr, "");
523 break;
524
525 case TGSI_FILE_OUTPUT:
526 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
527 if (tgsi_type_is_64bit(type)) {
528 ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
529 return radeon_llvm_emit_fetch_64bit(bld_base, type,
530 LLVMBuildLoad(builder, ptr, ""),
531 LLVMBuildLoad(builder, ptr2, ""));
532 }
533 result = LLVMBuildLoad(builder, ptr, "");
534 break;
535
536 default:
537 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
538 }
539
540 return bitcast(bld_base, type, result);
541 }
542
543 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
544 const struct tgsi_full_src_register *reg,
545 enum tgsi_opcode_type type,
546 unsigned swizzle)
547 {
548 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
549 struct gallivm_state *gallivm = bld_base->base.gallivm;
550
551 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
552 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
553 cval = LLVMBuildExtractElement(gallivm->builder, cval,
554 lp_build_const_int32(gallivm, swizzle), "");
555 }
556 return bitcast(bld_base, type, cval);
557 }
558
559 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
560 const struct tgsi_full_declaration *decl)
561 {
562 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
563 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
564 unsigned first, last, i;
565 switch(decl->Declaration.File) {
566 case TGSI_FILE_ADDRESS:
567 {
568 unsigned idx;
569 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
570 unsigned chan;
571 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
572 ctx->soa.addr[idx][chan] = lp_build_alloca_undef(
573 &ctx->gallivm,
574 ctx->soa.bld_base.uint_bld.elem_type, "");
575 }
576 }
577 break;
578 }
579
580 case TGSI_FILE_TEMPORARY:
581 {
582 char name[16] = "";
583 LLVMValueRef array_alloca = NULL;
584 unsigned decl_size;
585 unsigned writemask = decl->Declaration.UsageMask;
586 first = decl->Range.First;
587 last = decl->Range.Last;
588 decl_size = 4 * ((last - first) + 1);
589
590 if (decl->Declaration.Array) {
591 unsigned id = decl->Array.ArrayID - 1;
592 unsigned array_size;
593
594 writemask &= ctx->temp_arrays[id].writemask;
595 ctx->temp_arrays[id].writemask = writemask;
596 array_size = ((last - first) + 1) * util_bitcount(writemask);
597
598 /* If the array has more than 16 elements, store it
599 * in memory using an alloca that spans the entire
600 * array.
601 *
602 * Otherwise, store each array element individually.
603 * We will then generate vectors (per-channel, up to
604 * <16 x float> if the usagemask is a single bit) for
605 * indirect addressing.
606 *
607 * Note that 16 is the number of vector elements that
608 * LLVM will store in a register, so theoretically an
609 * array with up to 4 * 16 = 64 elements could be
610 * handled this way, but whether that's a good idea
611 * depends on VGPR register pressure elsewhere.
612 *
613 * FIXME: We shouldn't need to have the non-alloca
614 * code path for arrays. LLVM should be smart enough to
615 * promote allocas into registers when profitable.
616 *
617 * LLVM 3.8 crashes with this.
618 */
619 if (HAVE_LLVM >= 0x0309 && array_size > 16) {
620 array_alloca = LLVMBuildAlloca(builder,
621 LLVMArrayType(bld_base->base.vec_type,
622 array_size), "array");
623 ctx->temp_array_allocas[id] = array_alloca;
624 }
625 }
626
627 if (!ctx->temps_count) {
628 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
629 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
630 }
631 if (!array_alloca) {
632 for (i = 0; i < decl_size; ++i) {
633 #ifdef DEBUG
634 snprintf(name, sizeof(name), "TEMP%d.%c",
635 first + i / 4, "xyzw"[i % 4]);
636 #endif
637 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
638 lp_build_alloca_undef(bld_base->base.gallivm,
639 bld_base->base.vec_type,
640 name);
641 }
642 } else {
643 LLVMValueRef idxs[2] = {
644 bld_base->uint_bld.zero,
645 NULL
646 };
647 unsigned j = 0;
648
649 if (writemask != TGSI_WRITEMASK_XYZW &&
650 !ctx->undef_alloca) {
651 /* Create a dummy alloca. We use it so that we
652 * have a pointer that is safe to load from if
653 * a shader ever reads from a channel that
654 * it never writes to.
655 */
656 ctx->undef_alloca = lp_build_alloca_undef(
657 bld_base->base.gallivm,
658 bld_base->base.vec_type, "undef");
659 }
660
661 for (i = 0; i < decl_size; ++i) {
662 LLVMValueRef ptr;
663 if (writemask & (1 << (i % 4))) {
664 #ifdef DEBUG
665 snprintf(name, sizeof(name), "TEMP%d.%c",
666 first + i / 4, "xyzw"[i % 4]);
667 #endif
668 idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j);
669 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
670 j++;
671 } else {
672 ptr = ctx->undef_alloca;
673 }
674 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
675 }
676 }
677 break;
678 }
679 case TGSI_FILE_INPUT:
680 {
681 unsigned idx;
682 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
683 if (ctx->load_input) {
684 ctx->input_decls[idx] = *decl;
685
686 if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
687 ctx->load_input(ctx, idx, decl,
688 &ctx->inputs[idx * 4]);
689 }
690 }
691 }
692 break;
693
694 case TGSI_FILE_SYSTEM_VALUE:
695 {
696 unsigned idx;
697 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
698 ctx->load_system_value(ctx, idx, decl);
699 }
700 }
701 break;
702
703 case TGSI_FILE_OUTPUT:
704 {
705 unsigned idx;
706 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
707 unsigned chan;
708 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
709 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
710 ctx->soa.outputs[idx][chan] = lp_build_alloca_undef(
711 &ctx->gallivm,
712 ctx->soa.bld_base.base.elem_type, "");
713 }
714 }
715 break;
716 }
717
718 case TGSI_FILE_MEMORY:
719 ctx->declare_memory_region(ctx, decl);
720 break;
721
722 default:
723 break;
724 }
725 }
726
727 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
728 LLVMValueRef value)
729 {
730 struct lp_build_emit_data clamp_emit_data;
731
732 memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
733 clamp_emit_data.arg_count = 3;
734 clamp_emit_data.args[0] = value;
735 clamp_emit_data.args[2] = bld_base->base.one;
736 clamp_emit_data.args[1] = bld_base->base.zero;
737
738 return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
739 &clamp_emit_data);
740 }
741
742 void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
743 const struct tgsi_full_instruction *inst,
744 const struct tgsi_opcode_info *info,
745 LLVMValueRef dst[4])
746 {
747 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
748 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
749 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
750 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
751 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
752 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
753 unsigned chan, chan_index;
754 bool is_vec_store = false;
755 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
756
757 if (dst[0]) {
758 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
759 is_vec_store = (k == LLVMVectorTypeKind);
760 }
761
762 if (is_vec_store) {
763 LLVMValueRef values[4] = {};
764 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
765 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
766 values[chan] = LLVMBuildExtractElement(gallivm->builder,
767 dst[0], index, "");
768 }
769 bld_base->emit_store(bld_base, inst, info, values);
770 return;
771 }
772
773 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
774 LLVMValueRef value = dst[chan_index];
775
776 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
777 continue;
778 if (inst->Instruction.Saturate)
779 value = radeon_llvm_saturate(bld_base, value);
780
781 if (reg->Register.File == TGSI_FILE_ADDRESS) {
782 temp_ptr = bld->addr[reg->Register.Index][chan_index];
783 LLVMBuildStore(builder, value, temp_ptr);
784 continue;
785 }
786
787 if (!tgsi_type_is_64bit(dtype))
788 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
789
790 if (reg->Register.Indirect) {
791 unsigned file = reg->Register.File;
792 unsigned reg_index = reg->Register.Index;
793 store_value_to_array(bld_base, value, file, chan_index,
794 reg_index, &reg->Indirect);
795 } else {
796 switch(reg->Register.File) {
797 case TGSI_FILE_OUTPUT:
798 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
799 if (tgsi_type_is_64bit(dtype))
800 temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
801 break;
802
803 case TGSI_FILE_TEMPORARY:
804 {
805 if (reg->Register.Index >= ctx->temps_count)
806 continue;
807
808 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
809 if (tgsi_type_is_64bit(dtype))
810 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
811
812 break;
813 }
814 default:
815 return;
816 }
817 if (!tgsi_type_is_64bit(dtype))
818 LLVMBuildStore(builder, value, temp_ptr);
819 else {
820 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
821 LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
822 LLVMValueRef val2;
823 value = LLVMBuildExtractElement(builder, ptr,
824 bld_base->uint_bld.zero, "");
825 val2 = LLVMBuildExtractElement(builder, ptr,
826 bld_base->uint_bld.one, "");
827
828 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
829 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
830 }
831 }
832 }
833 }
834
835 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
836 {
837 char buf[32];
838 /* Subtract 1 so that the number shown is that of the corresponding
839 * opcode in the TGSI dump, e.g. an if block has the same suffix as
840 * the instruction number of the corresponding TGSI IF.
841 */
842 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
843 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
844 }
845
846 /* Append a basic block at the level of the parent flow.
847 */
848 static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx,
849 const char *name)
850 {
851 struct gallivm_state *gallivm = &ctx->gallivm;
852
853 assert(ctx->flow_depth >= 1);
854
855 if (ctx->flow_depth >= 2) {
856 struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
857
858 return LLVMInsertBasicBlockInContext(gallivm->context,
859 flow->next_block, name);
860 }
861
862 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
863 }
864
865 /* Emit a branch to the given default target for the current block if
866 * applicable -- that is, if the current block does not already contain a
867 * branch from a break or continue.
868 */
869 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
870 {
871 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
872 LLVMBuildBr(builder, target);
873 }
874
875 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
876 struct lp_build_tgsi_context *bld_base,
877 struct lp_build_emit_data *emit_data)
878 {
879 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
880 struct gallivm_state *gallivm = bld_base->base.gallivm;
881 struct radeon_llvm_flow *flow = push_flow(ctx);
882 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
883 flow->next_block = append_basic_block(ctx, "ENDLOOP");
884 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
885 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
886 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
887 }
888
889 static void brk_emit(const struct lp_build_tgsi_action *action,
890 struct lp_build_tgsi_context *bld_base,
891 struct lp_build_emit_data *emit_data)
892 {
893 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
894 struct gallivm_state *gallivm = bld_base->base.gallivm;
895 struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
896
897 LLVMBuildBr(gallivm->builder, flow->next_block);
898 }
899
900 static void cont_emit(const struct lp_build_tgsi_action *action,
901 struct lp_build_tgsi_context *bld_base,
902 struct lp_build_emit_data *emit_data)
903 {
904 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
905 struct gallivm_state *gallivm = bld_base->base.gallivm;
906 struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
907
908 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
909 }
910
911 static void else_emit(const struct lp_build_tgsi_action *action,
912 struct lp_build_tgsi_context *bld_base,
913 struct lp_build_emit_data *emit_data)
914 {
915 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
916 struct gallivm_state *gallivm = bld_base->base.gallivm;
917 struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
918 LLVMBasicBlockRef endif_block;
919
920 assert(!current_branch->loop_entry_block);
921
922 endif_block = append_basic_block(ctx, "ENDIF");
923 emit_default_branch(gallivm->builder, endif_block);
924
925 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
926 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
927
928 current_branch->next_block = endif_block;
929 }
930
931 static void endif_emit(const struct lp_build_tgsi_action *action,
932 struct lp_build_tgsi_context *bld_base,
933 struct lp_build_emit_data *emit_data)
934 {
935 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
936 struct gallivm_state *gallivm = bld_base->base.gallivm;
937 struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
938
939 assert(!current_branch->loop_entry_block);
940
941 emit_default_branch(gallivm->builder, current_branch->next_block);
942 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
943 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
944
945 ctx->flow_depth--;
946 }
947
948 static void endloop_emit(const struct lp_build_tgsi_action *action,
949 struct lp_build_tgsi_context *bld_base,
950 struct lp_build_emit_data *emit_data)
951 {
952 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
953 struct gallivm_state *gallivm = bld_base->base.gallivm;
954 struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
955
956 assert(current_loop->loop_entry_block);
957
958 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
959
960 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
961 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
962 ctx->flow_depth--;
963 }
964
965 static void if_cond_emit(const struct lp_build_tgsi_action *action,
966 struct lp_build_tgsi_context *bld_base,
967 struct lp_build_emit_data *emit_data,
968 LLVMValueRef cond)
969 {
970 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
971 struct gallivm_state *gallivm = bld_base->base.gallivm;
972 struct radeon_llvm_flow *flow = push_flow(ctx);
973 LLVMBasicBlockRef if_block;
974
975 if_block = append_basic_block(ctx, "IF");
976 flow->next_block = append_basic_block(ctx, "ELSE");
977 set_basicblock_name(if_block, "if", bld_base->pc);
978 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
979 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
980 }
981
982 static void if_emit(const struct lp_build_tgsi_action *action,
983 struct lp_build_tgsi_context *bld_base,
984 struct lp_build_emit_data *emit_data)
985 {
986 struct gallivm_state *gallivm = bld_base->base.gallivm;
987 LLVMValueRef cond;
988
989 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
990 emit_data->args[0],
991 bld_base->base.zero, "");
992
993 if_cond_emit(action, bld_base, emit_data, cond);
994 }
995
996 static void uif_emit(const struct lp_build_tgsi_action *action,
997 struct lp_build_tgsi_context *bld_base,
998 struct lp_build_emit_data *emit_data)
999 {
1000 struct gallivm_state *gallivm = bld_base->base.gallivm;
1001 LLVMValueRef cond;
1002
1003 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1004 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1005 bld_base->int_bld.zero, "");
1006
1007 if_cond_emit(action, bld_base, emit_data, cond);
1008 }
1009
1010 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
1011 struct lp_build_emit_data *emit_data)
1012 {
1013 const struct tgsi_full_instruction *inst = emit_data->inst;
1014 struct gallivm_state *gallivm = bld_base->base.gallivm;
1015 LLVMBuilderRef builder = gallivm->builder;
1016 unsigned i;
1017 LLVMValueRef conds[TGSI_NUM_CHANNELS];
1018
1019 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1020 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
1021 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
1022 bld_base->base.zero, "");
1023 }
1024
1025 /* Or the conditions together */
1026 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
1027 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
1028 }
1029
1030 emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
1031 emit_data->arg_count = 1;
1032 emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
1033 lp_build_const_float(gallivm, -1.0f),
1034 bld_base->base.zero, "");
1035 }
1036
1037 static void kil_emit(const struct lp_build_tgsi_action *action,
1038 struct lp_build_tgsi_context *bld_base,
1039 struct lp_build_emit_data *emit_data)
1040 {
1041 unsigned i;
1042 for (i = 0; i < emit_data->arg_count; i++) {
1043 emit_data->output[i] = lp_build_intrinsic_unary(
1044 bld_base->base.gallivm->builder,
1045 action->intr_name,
1046 emit_data->dst_type, emit_data->args[i]);
1047 }
1048 }
1049
1050 static LLVMValueRef build_cube_intrinsic(struct gallivm_state *gallivm,
1051 LLVMValueRef in[3])
1052 {
1053 if (HAVE_LLVM >= 0x0309) {
1054 LLVMTypeRef f32 = LLVMTypeOf(in[0]);
1055 LLVMValueRef out[4];
1056
1057 out[0] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubetc",
1058 f32, in, 3, LLVMReadNoneAttribute);
1059 out[1] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubesc",
1060 f32, in, 3, LLVMReadNoneAttribute);
1061 out[2] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubema",
1062 f32, in, 3, LLVMReadNoneAttribute);
1063 out[3] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubeid",
1064 f32, in, 3, LLVMReadNoneAttribute);
1065
1066 return lp_build_gather_values(gallivm, out, 4);
1067 } else {
1068 LLVMValueRef c[4] = {
1069 in[0],
1070 in[1],
1071 in[2],
1072 LLVMGetUndef(LLVMTypeOf(in[0]))
1073 };
1074 LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4);
1075
1076 return lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.cube",
1077 LLVMTypeOf(vec), &vec, 1,
1078 LLVMReadNoneAttribute);
1079 }
1080 }
1081
1082 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
1083 LLVMValueRef *in, LLVMValueRef *out)
1084 {
1085 struct gallivm_state *gallivm = bld_base->base.gallivm;
1086 LLVMBuilderRef builder = gallivm->builder;
1087 LLVMTypeRef type = bld_base->base.elem_type;
1088 LLVMValueRef coords[4];
1089 LLVMValueRef mad_args[3];
1090 LLVMValueRef v;
1091 unsigned i;
1092
1093 v = build_cube_intrinsic(gallivm, in);
1094
1095 for (i = 0; i < 4; ++i)
1096 coords[i] = LLVMBuildExtractElement(builder, v,
1097 lp_build_const_int32(gallivm, i), "");
1098
1099 coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
1100 type, &coords[2], 1, LLVMReadNoneAttribute);
1101 coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
1102
1103 mad_args[1] = coords[2];
1104 mad_args[2] = LLVMConstReal(type, 1.5);
1105
1106 mad_args[0] = coords[0];
1107 coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1108 mad_args[0], mad_args[1], mad_args[2]);
1109
1110 mad_args[0] = coords[1];
1111 coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1112 mad_args[0], mad_args[1], mad_args[2]);
1113
1114 /* apply xyz = yxw swizzle to cooords */
1115 out[0] = coords[1];
1116 out[1] = coords[0];
1117 out[2] = coords[3];
1118 }
1119
1120 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
1121 struct lp_build_emit_data *emit_data,
1122 LLVMValueRef *coords_arg,
1123 LLVMValueRef *derivs_arg)
1124 {
1125
1126 unsigned target = emit_data->inst->Texture.Texture;
1127 unsigned opcode = emit_data->inst->Instruction.Opcode;
1128 struct gallivm_state *gallivm = bld_base->base.gallivm;
1129 LLVMBuilderRef builder = gallivm->builder;
1130 LLVMValueRef coords[4];
1131 unsigned i;
1132
1133 radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
1134
1135 if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
1136 LLVMValueRef derivs[4];
1137 int axis;
1138
1139 /* Convert cube derivatives to 2D derivatives. */
1140 for (axis = 0; axis < 2; axis++) {
1141 LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
1142
1143 /* Shift the cube coordinates by the derivatives to get
1144 * the cube coordinates of the "neighboring pixel".
1145 */
1146 for (i = 0; i < 3; i++)
1147 shifted_cube_coords[i] =
1148 LLVMBuildFAdd(builder, coords_arg[i],
1149 derivs_arg[axis*3+i], "");
1150 shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
1151
1152 /* Project the shifted cube coordinates onto the face. */
1153 radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
1154 shifted_coords);
1155
1156 /* Subtract both sets of 2D coordinates to get 2D derivatives.
1157 * This won't work if the shifted coordinates ended up
1158 * in a different face.
1159 */
1160 for (i = 0; i < 2; i++)
1161 derivs[axis * 2 + i] =
1162 LLVMBuildFSub(builder, shifted_coords[i],
1163 coords[i], "");
1164 }
1165
1166 memcpy(derivs_arg, derivs, sizeof(derivs));
1167 }
1168
1169 if (target == TGSI_TEXTURE_CUBE_ARRAY ||
1170 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1171 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
1172 /* coords_arg.w component - array_index for cube arrays */
1173 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1174 coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
1175 }
1176
1177 /* Preserve compare/lod/bias. Put it in coords.w. */
1178 if (opcode == TGSI_OPCODE_TEX2 ||
1179 opcode == TGSI_OPCODE_TXB2 ||
1180 opcode == TGSI_OPCODE_TXL2) {
1181 coords[3] = coords_arg[4];
1182 } else if (opcode == TGSI_OPCODE_TXB ||
1183 opcode == TGSI_OPCODE_TXL ||
1184 target == TGSI_TEXTURE_SHADOWCUBE) {
1185 coords[3] = coords_arg[3];
1186 }
1187
1188 memcpy(coords_arg, coords, sizeof(coords));
1189 }
1190
1191 static void emit_icmp(const struct lp_build_tgsi_action *action,
1192 struct lp_build_tgsi_context *bld_base,
1193 struct lp_build_emit_data *emit_data)
1194 {
1195 unsigned pred;
1196 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1197 LLVMContextRef context = bld_base->base.gallivm->context;
1198
1199 switch (emit_data->inst->Instruction.Opcode) {
1200 case TGSI_OPCODE_USEQ:
1201 case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
1202 case TGSI_OPCODE_USNE:
1203 case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
1204 case TGSI_OPCODE_USGE:
1205 case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
1206 case TGSI_OPCODE_USLT:
1207 case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
1208 case TGSI_OPCODE_ISGE:
1209 case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
1210 case TGSI_OPCODE_ISLT:
1211 case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
1212 default:
1213 assert(!"unknown instruction");
1214 pred = 0;
1215 break;
1216 }
1217
1218 LLVMValueRef v = LLVMBuildICmp(builder, pred,
1219 emit_data->args[0], emit_data->args[1],"");
1220
1221 v = LLVMBuildSExtOrBitCast(builder, v,
1222 LLVMInt32TypeInContext(context), "");
1223
1224 emit_data->output[emit_data->chan] = v;
1225 }
1226
1227 static void emit_ucmp(const struct lp_build_tgsi_action *action,
1228 struct lp_build_tgsi_context *bld_base,
1229 struct lp_build_emit_data *emit_data)
1230 {
1231 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1232
1233 LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
1234 bld_base->uint_bld.elem_type, "");
1235
1236 LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
1237 bld_base->uint_bld.zero, "");
1238
1239 emit_data->output[emit_data->chan] =
1240 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
1241 }
1242
1243 static void emit_cmp(const struct lp_build_tgsi_action *action,
1244 struct lp_build_tgsi_context *bld_base,
1245 struct lp_build_emit_data *emit_data)
1246 {
1247 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1248 LLVMValueRef cond, *args = emit_data->args;
1249
1250 cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
1251 bld_base->base.zero, "");
1252
1253 emit_data->output[emit_data->chan] =
1254 LLVMBuildSelect(builder, cond, args[1], args[2], "");
1255 }
1256
1257 static void emit_set_cond(const struct lp_build_tgsi_action *action,
1258 struct lp_build_tgsi_context *bld_base,
1259 struct lp_build_emit_data *emit_data)
1260 {
1261 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1262 LLVMRealPredicate pred;
1263 LLVMValueRef cond;
1264
1265 /* Use ordered for everything but NE (which is usual for
1266 * float comparisons)
1267 */
1268 switch (emit_data->inst->Instruction.Opcode) {
1269 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
1270 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
1271 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
1272 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
1273 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
1274 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
1275 default: assert(!"unknown instruction"); pred = 0; break;
1276 }
1277
1278 cond = LLVMBuildFCmp(builder,
1279 pred, emit_data->args[0], emit_data->args[1], "");
1280
1281 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
1282 cond, bld_base->base.one, bld_base->base.zero, "");
1283 }
1284
1285 static void emit_fcmp(const struct lp_build_tgsi_action *action,
1286 struct lp_build_tgsi_context *bld_base,
1287 struct lp_build_emit_data *emit_data)
1288 {
1289 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1290 LLVMContextRef context = bld_base->base.gallivm->context;
1291 LLVMRealPredicate pred;
1292
1293 /* Use ordered for everything but NE (which is usual for
1294 * float comparisons)
1295 */
1296 switch (emit_data->inst->Instruction.Opcode) {
1297 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
1298 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
1299 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
1300 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
1301 default: assert(!"unknown instruction"); pred = 0; break;
1302 }
1303
1304 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1305 emit_data->args[0], emit_data->args[1],"");
1306
1307 v = LLVMBuildSExtOrBitCast(builder, v,
1308 LLVMInt32TypeInContext(context), "");
1309
1310 emit_data->output[emit_data->chan] = v;
1311 }
1312
1313 static void emit_dcmp(const struct lp_build_tgsi_action *action,
1314 struct lp_build_tgsi_context *bld_base,
1315 struct lp_build_emit_data *emit_data)
1316 {
1317 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1318 LLVMContextRef context = bld_base->base.gallivm->context;
1319 LLVMRealPredicate pred;
1320
1321 /* Use ordered for everything but NE (which is usual for
1322 * float comparisons)
1323 */
1324 switch (emit_data->inst->Instruction.Opcode) {
1325 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1326 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1327 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1328 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1329 default: assert(!"unknown instruction"); pred = 0; break;
1330 }
1331
1332 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1333 emit_data->args[0], emit_data->args[1],"");
1334
1335 v = LLVMBuildSExtOrBitCast(builder, v,
1336 LLVMInt32TypeInContext(context), "");
1337
1338 emit_data->output[emit_data->chan] = v;
1339 }
1340
1341 static void emit_not(const struct lp_build_tgsi_action *action,
1342 struct lp_build_tgsi_context *bld_base,
1343 struct lp_build_emit_data *emit_data)
1344 {
1345 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1346 LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1347 emit_data->args[0]);
1348 emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1349 }
1350
1351 static void emit_arl(const struct lp_build_tgsi_action *action,
1352 struct lp_build_tgsi_context *bld_base,
1353 struct lp_build_emit_data *emit_data)
1354 {
1355 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1356 LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1357 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1358 floor_index, bld_base->base.int_elem_type , "");
1359 }
1360
1361 static void emit_and(const struct lp_build_tgsi_action *action,
1362 struct lp_build_tgsi_context *bld_base,
1363 struct lp_build_emit_data *emit_data)
1364 {
1365 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1366 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1367 emit_data->args[0], emit_data->args[1], "");
1368 }
1369
1370 static void emit_or(const struct lp_build_tgsi_action *action,
1371 struct lp_build_tgsi_context *bld_base,
1372 struct lp_build_emit_data *emit_data)
1373 {
1374 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1375 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1376 emit_data->args[0], emit_data->args[1], "");
1377 }
1378
1379 static void emit_uadd(const struct lp_build_tgsi_action *action,
1380 struct lp_build_tgsi_context *bld_base,
1381 struct lp_build_emit_data *emit_data)
1382 {
1383 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1384 emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1385 emit_data->args[0], emit_data->args[1], "");
1386 }
1387
1388 static void emit_udiv(const struct lp_build_tgsi_action *action,
1389 struct lp_build_tgsi_context *bld_base,
1390 struct lp_build_emit_data *emit_data)
1391 {
1392 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1393 emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1394 emit_data->args[0], emit_data->args[1], "");
1395 }
1396
1397 static void emit_idiv(const struct lp_build_tgsi_action *action,
1398 struct lp_build_tgsi_context *bld_base,
1399 struct lp_build_emit_data *emit_data)
1400 {
1401 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1402 emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1403 emit_data->args[0], emit_data->args[1], "");
1404 }
1405
1406 static void emit_mod(const struct lp_build_tgsi_action *action,
1407 struct lp_build_tgsi_context *bld_base,
1408 struct lp_build_emit_data *emit_data)
1409 {
1410 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1411 emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1412 emit_data->args[0], emit_data->args[1], "");
1413 }
1414
1415 static void emit_umod(const struct lp_build_tgsi_action *action,
1416 struct lp_build_tgsi_context *bld_base,
1417 struct lp_build_emit_data *emit_data)
1418 {
1419 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1420 emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1421 emit_data->args[0], emit_data->args[1], "");
1422 }
1423
1424 static void emit_shl(const struct lp_build_tgsi_action *action,
1425 struct lp_build_tgsi_context *bld_base,
1426 struct lp_build_emit_data *emit_data)
1427 {
1428 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1429 emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1430 emit_data->args[0], emit_data->args[1], "");
1431 }
1432
1433 static void emit_ushr(const struct lp_build_tgsi_action *action,
1434 struct lp_build_tgsi_context *bld_base,
1435 struct lp_build_emit_data *emit_data)
1436 {
1437 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1438 emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1439 emit_data->args[0], emit_data->args[1], "");
1440 }
1441 static void emit_ishr(const struct lp_build_tgsi_action *action,
1442 struct lp_build_tgsi_context *bld_base,
1443 struct lp_build_emit_data *emit_data)
1444 {
1445 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1446 emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1447 emit_data->args[0], emit_data->args[1], "");
1448 }
1449
1450 static void emit_xor(const struct lp_build_tgsi_action *action,
1451 struct lp_build_tgsi_context *bld_base,
1452 struct lp_build_emit_data *emit_data)
1453 {
1454 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1455 emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1456 emit_data->args[0], emit_data->args[1], "");
1457 }
1458
1459 static void emit_ssg(const struct lp_build_tgsi_action *action,
1460 struct lp_build_tgsi_context *bld_base,
1461 struct lp_build_emit_data *emit_data)
1462 {
1463 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1464
1465 LLVMValueRef cmp, val;
1466
1467 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
1468 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
1469 val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
1470 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
1471 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
1472 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1473 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1474 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1475 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1476 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1477 } else { // float SSG
1478 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1479 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1480 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1481 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1482 }
1483
1484 emit_data->output[emit_data->chan] = val;
1485 }
1486
1487 static void emit_ineg(const struct lp_build_tgsi_action *action,
1488 struct lp_build_tgsi_context *bld_base,
1489 struct lp_build_emit_data *emit_data)
1490 {
1491 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1492 emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1493 emit_data->args[0], "");
1494 }
1495
1496 static void emit_dneg(const struct lp_build_tgsi_action *action,
1497 struct lp_build_tgsi_context *bld_base,
1498 struct lp_build_emit_data *emit_data)
1499 {
1500 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1501 emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1502 emit_data->args[0], "");
1503 }
1504
1505 static void emit_frac(const struct lp_build_tgsi_action *action,
1506 struct lp_build_tgsi_context *bld_base,
1507 struct lp_build_emit_data *emit_data)
1508 {
1509 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1510 char *intr;
1511
1512 if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1513 intr = "llvm.floor.f32";
1514 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1515 intr = "llvm.floor.f64";
1516 else {
1517 assert(0);
1518 return;
1519 }
1520
1521 LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1522 &emit_data->args[0], 1,
1523 LLVMReadNoneAttribute);
1524 emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1525 emit_data->args[0], floor, "");
1526 }
1527
1528 static void emit_f2i(const struct lp_build_tgsi_action *action,
1529 struct lp_build_tgsi_context *bld_base,
1530 struct lp_build_emit_data *emit_data)
1531 {
1532 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1533 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1534 emit_data->args[0], bld_base->int_bld.elem_type, "");
1535 }
1536
1537 static void emit_f2u(const struct lp_build_tgsi_action *action,
1538 struct lp_build_tgsi_context *bld_base,
1539 struct lp_build_emit_data *emit_data)
1540 {
1541 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1542 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1543 emit_data->args[0], bld_base->uint_bld.elem_type, "");
1544 }
1545
1546 static void emit_i2f(const struct lp_build_tgsi_action *action,
1547 struct lp_build_tgsi_context *bld_base,
1548 struct lp_build_emit_data *emit_data)
1549 {
1550 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1551 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1552 emit_data->args[0], bld_base->base.elem_type, "");
1553 }
1554
1555 static void emit_u2f(const struct lp_build_tgsi_action *action,
1556 struct lp_build_tgsi_context *bld_base,
1557 struct lp_build_emit_data *emit_data)
1558 {
1559 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1560 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1561 emit_data->args[0], bld_base->base.elem_type, "");
1562 }
1563
1564 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1565 const struct tgsi_full_immediate *imm)
1566 {
1567 unsigned i;
1568 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1569
1570 for (i = 0; i < 4; ++i) {
1571 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1572 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false );
1573 }
1574
1575 ctx->soa.num_immediates++;
1576 }
1577
1578 void
1579 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1580 struct lp_build_tgsi_context *bld_base,
1581 struct lp_build_emit_data *emit_data)
1582 {
1583 struct lp_build_context *base = &bld_base->base;
1584 emit_data->output[emit_data->chan] =
1585 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1586 emit_data->dst_type, emit_data->args,
1587 emit_data->arg_count, LLVMReadNoneAttribute);
1588 }
1589
1590 static void emit_bfi(const struct lp_build_tgsi_action *action,
1591 struct lp_build_tgsi_context *bld_base,
1592 struct lp_build_emit_data *emit_data)
1593 {
1594 struct gallivm_state *gallivm = bld_base->base.gallivm;
1595 LLVMBuilderRef builder = gallivm->builder;
1596 LLVMValueRef bfi_args[3];
1597
1598 // Calculate the bitmask: (((1 << src3) - 1) << src2
1599 bfi_args[0] = LLVMBuildShl(builder,
1600 LLVMBuildSub(builder,
1601 LLVMBuildShl(builder,
1602 bld_base->int_bld.one,
1603 emit_data->args[3], ""),
1604 bld_base->int_bld.one, ""),
1605 emit_data->args[2], "");
1606
1607 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1608 emit_data->args[2], "");
1609
1610 bfi_args[2] = emit_data->args[0];
1611
1612 /* Calculate:
1613 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1614 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1615 */
1616 emit_data->output[emit_data->chan] =
1617 LLVMBuildXor(builder, bfi_args[2],
1618 LLVMBuildAnd(builder, bfi_args[0],
1619 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1620 ""), ""), "");
1621 }
1622
1623 /* this is ffs in C */
1624 static void emit_lsb(const struct lp_build_tgsi_action *action,
1625 struct lp_build_tgsi_context *bld_base,
1626 struct lp_build_emit_data *emit_data)
1627 {
1628 struct gallivm_state *gallivm = bld_base->base.gallivm;
1629 LLVMValueRef args[2] = {
1630 emit_data->args[0],
1631
1632 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1633 * add special code to check for x=0. The reason is that
1634 * the LLVM behavior for x=0 is different from what we
1635 * need here.
1636 *
1637 * The hardware already implements the correct behavior.
1638 */
1639 lp_build_const_int32(gallivm, 1)
1640 };
1641
1642 emit_data->output[emit_data->chan] =
1643 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1644 emit_data->dst_type, args, ARRAY_SIZE(args),
1645 LLVMReadNoneAttribute);
1646 }
1647
1648 /* Find the last bit set. */
1649 static void emit_umsb(const struct lp_build_tgsi_action *action,
1650 struct lp_build_tgsi_context *bld_base,
1651 struct lp_build_emit_data *emit_data)
1652 {
1653 struct gallivm_state *gallivm = bld_base->base.gallivm;
1654 LLVMBuilderRef builder = gallivm->builder;
1655 LLVMValueRef args[2] = {
1656 emit_data->args[0],
1657 /* Don't generate code for handling zero: */
1658 lp_build_const_int32(gallivm, 1)
1659 };
1660
1661 LLVMValueRef msb =
1662 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1663 emit_data->dst_type, args, ARRAY_SIZE(args),
1664 LLVMReadNoneAttribute);
1665
1666 /* The HW returns the last bit index from MSB, but TGSI wants
1667 * the index from LSB. Invert it by doing "31 - msb". */
1668 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1669 msb, "");
1670
1671 /* Check for zero: */
1672 emit_data->output[emit_data->chan] =
1673 LLVMBuildSelect(builder,
1674 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1675 bld_base->uint_bld.zero, ""),
1676 lp_build_const_int32(gallivm, -1), msb, "");
1677 }
1678
1679 /* Find the last bit opposite of the sign bit. */
1680 static void emit_imsb(const struct lp_build_tgsi_action *action,
1681 struct lp_build_tgsi_context *bld_base,
1682 struct lp_build_emit_data *emit_data)
1683 {
1684 struct gallivm_state *gallivm = bld_base->base.gallivm;
1685 LLVMBuilderRef builder = gallivm->builder;
1686 LLVMValueRef arg = emit_data->args[0];
1687
1688 LLVMValueRef msb =
1689 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1690 emit_data->dst_type, &arg, 1,
1691 LLVMReadNoneAttribute);
1692
1693 /* The HW returns the last bit index from MSB, but TGSI wants
1694 * the index from LSB. Invert it by doing "31 - msb". */
1695 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1696 msb, "");
1697
1698 /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1699 LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1700
1701 LLVMValueRef cond =
1702 LLVMBuildOr(builder,
1703 LLVMBuildICmp(builder, LLVMIntEQ, arg,
1704 bld_base->uint_bld.zero, ""),
1705 LLVMBuildICmp(builder, LLVMIntEQ, arg,
1706 all_ones, ""), "");
1707
1708 emit_data->output[emit_data->chan] =
1709 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1710 }
1711
1712 static void emit_iabs(const struct lp_build_tgsi_action *action,
1713 struct lp_build_tgsi_context *bld_base,
1714 struct lp_build_emit_data *emit_data)
1715 {
1716 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1717
1718 emit_data->output[emit_data->chan] =
1719 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1720 emit_data->args[0],
1721 LLVMBuildNeg(builder,
1722 emit_data->args[0], ""));
1723 }
1724
1725 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1726 struct lp_build_tgsi_context *bld_base,
1727 struct lp_build_emit_data *emit_data)
1728 {
1729 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1730 LLVMIntPredicate op;
1731
1732 switch (emit_data->info->opcode) {
1733 default:
1734 assert(0);
1735 case TGSI_OPCODE_IMAX:
1736 case TGSI_OPCODE_I64MAX:
1737 op = LLVMIntSGT;
1738 break;
1739 case TGSI_OPCODE_IMIN:
1740 case TGSI_OPCODE_I64MIN:
1741 op = LLVMIntSLT;
1742 break;
1743 case TGSI_OPCODE_UMAX:
1744 case TGSI_OPCODE_U64MAX:
1745 op = LLVMIntUGT;
1746 break;
1747 case TGSI_OPCODE_UMIN:
1748 case TGSI_OPCODE_U64MIN:
1749 op = LLVMIntULT;
1750 break;
1751 }
1752
1753 emit_data->output[emit_data->chan] =
1754 LLVMBuildSelect(builder,
1755 LLVMBuildICmp(builder, op, emit_data->args[0],
1756 emit_data->args[1], ""),
1757 emit_data->args[0],
1758 emit_data->args[1], "");
1759 }
1760
1761 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1762 struct lp_build_emit_data *emit_data)
1763 {
1764 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1765 0, TGSI_CHAN_X);
1766 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
1767 0, TGSI_CHAN_Y);
1768 }
1769
1770 static void emit_pk2h(const struct lp_build_tgsi_action *action,
1771 struct lp_build_tgsi_context *bld_base,
1772 struct lp_build_emit_data *emit_data)
1773 {
1774 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1775 LLVMContextRef context = bld_base->base.gallivm->context;
1776 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1777 LLVMTypeRef fp16, i16;
1778 LLVMValueRef const16, comp[2];
1779 unsigned i;
1780
1781 fp16 = LLVMHalfTypeInContext(context);
1782 i16 = LLVMInt16TypeInContext(context);
1783 const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1784
1785 for (i = 0; i < 2; i++) {
1786 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
1787 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
1788 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
1789 }
1790
1791 comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
1792 comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
1793
1794 emit_data->output[emit_data->chan] = comp[0];
1795 }
1796
1797 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1798 struct lp_build_emit_data *emit_data)
1799 {
1800 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1801 0, TGSI_CHAN_X);
1802 }
1803
1804 static void emit_up2h(const struct lp_build_tgsi_action *action,
1805 struct lp_build_tgsi_context *bld_base,
1806 struct lp_build_emit_data *emit_data)
1807 {
1808 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1809 LLVMContextRef context = bld_base->base.gallivm->context;
1810 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1811 LLVMTypeRef fp16, i16;
1812 LLVMValueRef const16, input, val;
1813 unsigned i;
1814
1815 fp16 = LLVMHalfTypeInContext(context);
1816 i16 = LLVMInt16TypeInContext(context);
1817 const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1818 input = emit_data->args[0];
1819
1820 for (i = 0; i < 2; i++) {
1821 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
1822 val = LLVMBuildTrunc(builder, val, i16, "");
1823 val = LLVMBuildBitCast(builder, val, fp16, "");
1824 emit_data->output[i] =
1825 LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
1826 }
1827 }
1828
1829 static void emit_fdiv(const struct lp_build_tgsi_action *action,
1830 struct lp_build_tgsi_context *bld_base,
1831 struct lp_build_emit_data *emit_data)
1832 {
1833 struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1834
1835 emit_data->output[emit_data->chan] =
1836 LLVMBuildFDiv(bld_base->base.gallivm->builder,
1837 emit_data->args[0], emit_data->args[1], "");
1838
1839 /* Use v_rcp_f32 instead of precise division. */
1840 if (HAVE_LLVM >= 0x0309 &&
1841 !LLVMIsConstant(emit_data->output[emit_data->chan]))
1842 LLVMSetMetadata(emit_data->output[emit_data->chan],
1843 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1844 }
1845
1846 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
1847 * the target machine. f64 needs global unsafe math flags to get rsq. */
1848 static void emit_rsq(const struct lp_build_tgsi_action *action,
1849 struct lp_build_tgsi_context *bld_base,
1850 struct lp_build_emit_data *emit_data)
1851 {
1852 LLVMValueRef sqrt =
1853 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
1854 emit_data->args[0]);
1855
1856 emit_data->output[emit_data->chan] =
1857 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
1858 bld_base->base.one, sqrt);
1859 }
1860
1861 void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple,
1862 const struct tgsi_shader_info *info,
1863 const struct tgsi_token *tokens)
1864 {
1865 struct lp_type type;
1866
1867 /* Initialize the gallivm object:
1868 * We are only using the module, context, and builder fields of this struct.
1869 * This should be enough for us to be able to pass our gallivm struct to the
1870 * helper functions in the gallivm module.
1871 */
1872 memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1873 memset(&ctx->soa, 0, sizeof(ctx->soa));
1874 ctx->gallivm.context = LLVMContextCreate();
1875 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1876 ctx->gallivm.context);
1877 LLVMSetTarget(ctx->gallivm.module, triple);
1878 ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1879
1880 struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
1881
1882 bld_base->info = info;
1883
1884 if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1885 int size = info->array_max[TGSI_FILE_TEMPORARY];
1886
1887 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1888 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1889
1890 if (tokens)
1891 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1892 ctx->temp_arrays);
1893 }
1894
1895 type.floating = true;
1896 type.fixed = false;
1897 type.sign = true;
1898 type.norm = false;
1899 type.width = 32;
1900 type.length = 1;
1901
1902 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1903 lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1904 lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1905 {
1906 struct lp_type dbl_type;
1907 dbl_type = type;
1908 dbl_type.width *= 2;
1909 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1910 }
1911 {
1912 struct lp_type dtype;
1913 dtype = lp_uint_type(type);
1914 dtype.width *= 2;
1915 lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, dtype);
1916 }
1917 {
1918 struct lp_type dtype;
1919 dtype = lp_int_type(type);
1920 dtype.width *= 2;
1921 lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, dtype);
1922 }
1923
1924 bld_base->soa = 1;
1925 bld_base->emit_store = radeon_llvm_emit_store;
1926 bld_base->emit_swizzle = emit_swizzle;
1927 bld_base->emit_declaration = emit_declaration;
1928 bld_base->emit_immediate = emit_immediate;
1929
1930 bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1931 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1932 bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1933 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1934 bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1935
1936 /* metadata allowing 2.5 ULP */
1937 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1938 "fpmath", 6);
1939 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1940 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1941 &arg, 1);
1942
1943 /* Allocate outputs */
1944 ctx->soa.outputs = ctx->outputs;
1945
1946 lp_set_default_actions(bld_base);
1947
1948 bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1949 bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1950 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1951 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1952 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1953 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1954 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1955 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
1956 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
1957 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1958 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1959 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1960 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1961 bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
1962 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
1963 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1964 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1965 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1966 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1967 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1968 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1969 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1970 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1971 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1972 bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
1973 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1974 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1975 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1976 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1977 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1978 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1979 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name =
1980 HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
1981 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1982 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1983 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1984 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1985 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1986 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1987 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
1988 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
1989 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1990 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1991 bld_base->op_actions[TGSI_OPCODE_FMA].emit =
1992 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
1993 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1994 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1995 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1996 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1997 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1998 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1999 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
2000 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
2001 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
2002 bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
2003 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
2004 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
2005 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
2006 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
2007 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
2008 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
2009 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
2010 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
2011 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
2012 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
2013 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
2014 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
2015 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
2016 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
2017 bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
2018 bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
2019 bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
2020 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
2021 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
2022 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
2023 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
2024 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
2025 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
2026 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
2027 bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
2028 bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
2029 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
2030 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
2031 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
2032 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
2033 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
2034 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
2035 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
2036 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
2037 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
2038 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
2039 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
2040 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
2041 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
2042 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
2043 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
2044 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
2045 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
2046 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
2047 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
2048 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
2049 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
2050 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
2051 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
2052 bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
2053 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
2054 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
2055 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
2056 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
2057 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
2058 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
2059 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
2060 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
2061 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
2062 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
2063 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
2064 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
2065 bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
2066 bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
2067
2068 bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
2069 bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
2070 bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
2071 bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
2072 bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
2073 bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
2074 bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
2075
2076 bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
2077 bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
2078 bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
2079 bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
2080 bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
2081 bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
2082
2083 bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
2084 bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
2085 bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
2086 bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
2087
2088 bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
2089 bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
2090 bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
2091 bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
2092 }
2093
2094 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
2095 LLVMTypeRef *return_types, unsigned num_return_elems,
2096 LLVMTypeRef *ParamTypes, unsigned ParamCount)
2097 {
2098 LLVMTypeRef main_fn_type, ret_type;
2099 LLVMBasicBlockRef main_fn_body;
2100
2101 if (num_return_elems)
2102 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
2103 return_types,
2104 num_return_elems, true);
2105 else
2106 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
2107
2108 /* Setup the function */
2109 ctx->return_type = ret_type;
2110 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
2111 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
2112 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
2113 ctx->main_fn, "main_body");
2114 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
2115 }
2116
2117 void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
2118 {
2119 struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
2120 const char *triple = LLVMGetTarget(gallivm->module);
2121 LLVMTargetLibraryInfoRef target_library_info;
2122
2123 /* Create the pass manager */
2124 gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
2125 gallivm->module);
2126
2127 target_library_info = gallivm_create_target_library_info(triple);
2128 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
2129
2130 /* This pass should eliminate all the load and store instructions */
2131 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
2132
2133 /* Add some optimization passes */
2134 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
2135 LLVMAddLICMPass(gallivm->passmgr);
2136 LLVMAddAggressiveDCEPass(gallivm->passmgr);
2137 LLVMAddCFGSimplificationPass(gallivm->passmgr);
2138 LLVMAddInstructionCombiningPass(gallivm->passmgr);
2139
2140 /* Run the pass */
2141 LLVMInitializeFunctionPassManager(gallivm->passmgr);
2142 LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
2143 LLVMFinalizeFunctionPassManager(gallivm->passmgr);
2144
2145 LLVMDisposeBuilder(gallivm->builder);
2146 LLVMDisposePassManager(gallivm->passmgr);
2147 gallivm_dispose_target_library_info(target_library_info);
2148 }
2149
2150 void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
2151 {
2152 LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
2153 LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
2154 FREE(ctx->temp_arrays);
2155 ctx->temp_arrays = NULL;
2156 FREE(ctx->temp_array_allocas);
2157 ctx->temp_array_allocas = NULL;
2158 FREE(ctx->temps);
2159 ctx->temps = NULL;
2160 ctx->temps_count = 0;
2161 FREE(ctx->flow);
2162 ctx->flow = NULL;
2163 ctx->flow_depth_max = 0;
2164 }