radeonsi: rewrite late alloc VS limit computation
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_alu.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "gallivm/lp_bld_const.h"
26 #include "gallivm/lp_bld_intr.h"
27 #include "gallivm/lp_bld_gather.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "amd/common/ac_llvm_build.h"
30
31 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
32 struct lp_build_emit_data *emit_data)
33 {
34 const struct tgsi_full_instruction *inst = emit_data->inst;
35 struct gallivm_state *gallivm = bld_base->base.gallivm;
36 LLVMBuilderRef builder = gallivm->builder;
37 unsigned i;
38 LLVMValueRef conds[TGSI_NUM_CHANNELS];
39
40 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
41 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
42 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
43 bld_base->base.zero, "");
44 }
45
46 /* Or the conditions together */
47 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
48 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
49 }
50
51 emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
52 emit_data->arg_count = 1;
53 emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
54 lp_build_const_float(gallivm, -1.0f),
55 bld_base->base.zero, "");
56 }
57
58 static void kil_emit(const struct lp_build_tgsi_action *action,
59 struct lp_build_tgsi_context *bld_base,
60 struct lp_build_emit_data *emit_data)
61 {
62 struct si_shader_context *ctx = si_shader_context(bld_base);
63 LLVMBuilderRef builder = ctx->gallivm.builder;
64
65 if (ctx->postponed_kill) {
66 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
67 LLVMValueRef val;
68
69 /* Take the minimum kill value. This is the same as OR
70 * between 2 kill values. If the value is negative,
71 * the pixel will be killed.
72 */
73 val = LLVMBuildLoad(builder, ctx->postponed_kill, "");
74 val = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
75 val, emit_data->args[0]);
76 LLVMBuildStore(builder, val, ctx->postponed_kill);
77 } else {
78 LLVMBuildStore(builder,
79 LLVMConstReal(ctx->f32, -1),
80 ctx->postponed_kill);
81 }
82 return;
83 }
84
85 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF)
86 ac_build_kill(&ctx->ac, emit_data->args[0]);
87 else
88 ac_build_kill(&ctx->ac, NULL);
89 }
90
91 static void emit_icmp(const struct lp_build_tgsi_action *action,
92 struct lp_build_tgsi_context *bld_base,
93 struct lp_build_emit_data *emit_data)
94 {
95 unsigned pred;
96 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
97 LLVMContextRef context = bld_base->base.gallivm->context;
98
99 switch (emit_data->inst->Instruction.Opcode) {
100 case TGSI_OPCODE_USEQ:
101 case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
102 case TGSI_OPCODE_USNE:
103 case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
104 case TGSI_OPCODE_USGE:
105 case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
106 case TGSI_OPCODE_USLT:
107 case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
108 case TGSI_OPCODE_ISGE:
109 case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
110 case TGSI_OPCODE_ISLT:
111 case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
112 default:
113 assert(!"unknown instruction");
114 pred = 0;
115 break;
116 }
117
118 LLVMValueRef v = LLVMBuildICmp(builder, pred,
119 emit_data->args[0], emit_data->args[1],"");
120
121 v = LLVMBuildSExtOrBitCast(builder, v,
122 LLVMInt32TypeInContext(context), "");
123
124 emit_data->output[emit_data->chan] = v;
125 }
126
127 static void emit_ucmp(const struct lp_build_tgsi_action *action,
128 struct lp_build_tgsi_context *bld_base,
129 struct lp_build_emit_data *emit_data)
130 {
131 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
132
133 LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
134 bld_base->uint_bld.elem_type, "");
135
136 LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
137 bld_base->uint_bld.zero, "");
138
139 emit_data->output[emit_data->chan] =
140 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
141 }
142
143 static void emit_cmp(const struct lp_build_tgsi_action *action,
144 struct lp_build_tgsi_context *bld_base,
145 struct lp_build_emit_data *emit_data)
146 {
147 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
148 LLVMValueRef cond, *args = emit_data->args;
149
150 cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
151 bld_base->base.zero, "");
152
153 emit_data->output[emit_data->chan] =
154 LLVMBuildSelect(builder, cond, args[1], args[2], "");
155 }
156
157 static void emit_set_cond(const struct lp_build_tgsi_action *action,
158 struct lp_build_tgsi_context *bld_base,
159 struct lp_build_emit_data *emit_data)
160 {
161 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
162 LLVMRealPredicate pred;
163 LLVMValueRef cond;
164
165 /* Use ordered for everything but NE (which is usual for
166 * float comparisons)
167 */
168 switch (emit_data->inst->Instruction.Opcode) {
169 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
170 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
171 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
172 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
173 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
174 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
175 default: assert(!"unknown instruction"); pred = 0; break;
176 }
177
178 cond = LLVMBuildFCmp(builder,
179 pred, emit_data->args[0], emit_data->args[1], "");
180
181 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
182 cond, bld_base->base.one, bld_base->base.zero, "");
183 }
184
185 static void emit_fcmp(const struct lp_build_tgsi_action *action,
186 struct lp_build_tgsi_context *bld_base,
187 struct lp_build_emit_data *emit_data)
188 {
189 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
190 LLVMContextRef context = bld_base->base.gallivm->context;
191 LLVMRealPredicate pred;
192
193 /* Use ordered for everything but NE (which is usual for
194 * float comparisons)
195 */
196 switch (emit_data->inst->Instruction.Opcode) {
197 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
198 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
199 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
200 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
201 default: assert(!"unknown instruction"); pred = 0; break;
202 }
203
204 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
205 emit_data->args[0], emit_data->args[1],"");
206
207 v = LLVMBuildSExtOrBitCast(builder, v,
208 LLVMInt32TypeInContext(context), "");
209
210 emit_data->output[emit_data->chan] = v;
211 }
212
213 static void emit_dcmp(const struct lp_build_tgsi_action *action,
214 struct lp_build_tgsi_context *bld_base,
215 struct lp_build_emit_data *emit_data)
216 {
217 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
218 LLVMContextRef context = bld_base->base.gallivm->context;
219 LLVMRealPredicate pred;
220
221 /* Use ordered for everything but NE (which is usual for
222 * float comparisons)
223 */
224 switch (emit_data->inst->Instruction.Opcode) {
225 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
226 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
227 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
228 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
229 default: assert(!"unknown instruction"); pred = 0; break;
230 }
231
232 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
233 emit_data->args[0], emit_data->args[1],"");
234
235 v = LLVMBuildSExtOrBitCast(builder, v,
236 LLVMInt32TypeInContext(context), "");
237
238 emit_data->output[emit_data->chan] = v;
239 }
240
241 static void emit_not(const struct lp_build_tgsi_action *action,
242 struct lp_build_tgsi_context *bld_base,
243 struct lp_build_emit_data *emit_data)
244 {
245 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
246 LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
247 emit_data->args[0]);
248 emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
249 }
250
251 static void emit_arl(const struct lp_build_tgsi_action *action,
252 struct lp_build_tgsi_context *bld_base,
253 struct lp_build_emit_data *emit_data)
254 {
255 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
256 LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
257 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
258 floor_index, bld_base->base.int_elem_type , "");
259 }
260
261 static void emit_and(const struct lp_build_tgsi_action *action,
262 struct lp_build_tgsi_context *bld_base,
263 struct lp_build_emit_data *emit_data)
264 {
265 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
266 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
267 emit_data->args[0], emit_data->args[1], "");
268 }
269
270 static void emit_or(const struct lp_build_tgsi_action *action,
271 struct lp_build_tgsi_context *bld_base,
272 struct lp_build_emit_data *emit_data)
273 {
274 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
275 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
276 emit_data->args[0], emit_data->args[1], "");
277 }
278
279 static void emit_uadd(const struct lp_build_tgsi_action *action,
280 struct lp_build_tgsi_context *bld_base,
281 struct lp_build_emit_data *emit_data)
282 {
283 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
284 emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
285 emit_data->args[0], emit_data->args[1], "");
286 }
287
288 static void emit_udiv(const struct lp_build_tgsi_action *action,
289 struct lp_build_tgsi_context *bld_base,
290 struct lp_build_emit_data *emit_data)
291 {
292 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
293 emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
294 emit_data->args[0], emit_data->args[1], "");
295 }
296
297 static void emit_idiv(const struct lp_build_tgsi_action *action,
298 struct lp_build_tgsi_context *bld_base,
299 struct lp_build_emit_data *emit_data)
300 {
301 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
302 emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
303 emit_data->args[0], emit_data->args[1], "");
304 }
305
306 static void emit_mod(const struct lp_build_tgsi_action *action,
307 struct lp_build_tgsi_context *bld_base,
308 struct lp_build_emit_data *emit_data)
309 {
310 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
311 emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
312 emit_data->args[0], emit_data->args[1], "");
313 }
314
315 static void emit_umod(const struct lp_build_tgsi_action *action,
316 struct lp_build_tgsi_context *bld_base,
317 struct lp_build_emit_data *emit_data)
318 {
319 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
320 emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
321 emit_data->args[0], emit_data->args[1], "");
322 }
323
324 static void emit_shl(const struct lp_build_tgsi_action *action,
325 struct lp_build_tgsi_context *bld_base,
326 struct lp_build_emit_data *emit_data)
327 {
328 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
329 emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
330 emit_data->args[0], emit_data->args[1], "");
331 }
332
333 static void emit_ushr(const struct lp_build_tgsi_action *action,
334 struct lp_build_tgsi_context *bld_base,
335 struct lp_build_emit_data *emit_data)
336 {
337 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
338 emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
339 emit_data->args[0], emit_data->args[1], "");
340 }
341 static void emit_ishr(const struct lp_build_tgsi_action *action,
342 struct lp_build_tgsi_context *bld_base,
343 struct lp_build_emit_data *emit_data)
344 {
345 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
346 emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
347 emit_data->args[0], emit_data->args[1], "");
348 }
349
350 static void emit_xor(const struct lp_build_tgsi_action *action,
351 struct lp_build_tgsi_context *bld_base,
352 struct lp_build_emit_data *emit_data)
353 {
354 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
355 emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
356 emit_data->args[0], emit_data->args[1], "");
357 }
358
359 static void emit_ssg(const struct lp_build_tgsi_action *action,
360 struct lp_build_tgsi_context *bld_base,
361 struct lp_build_emit_data *emit_data)
362 {
363 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
364
365 LLVMValueRef cmp, val;
366
367 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
368 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
369 val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
370 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
371 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
372 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
373 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
374 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
375 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
376 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
377 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
378 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->dbl_bld.zero, "");
379 val = LLVMBuildSelect(builder, cmp, bld_base->dbl_bld.one, emit_data->args[0], "");
380 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->dbl_bld.zero, "");
381 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->dbl_bld.elem_type, -1), "");
382 } else { // float SSG
383 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
384 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
385 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
386 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
387 }
388
389 emit_data->output[emit_data->chan] = val;
390 }
391
392 static void emit_ineg(const struct lp_build_tgsi_action *action,
393 struct lp_build_tgsi_context *bld_base,
394 struct lp_build_emit_data *emit_data)
395 {
396 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
397 emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
398 emit_data->args[0], "");
399 }
400
401 static void emit_dneg(const struct lp_build_tgsi_action *action,
402 struct lp_build_tgsi_context *bld_base,
403 struct lp_build_emit_data *emit_data)
404 {
405 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
406 emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
407 emit_data->args[0], "");
408 }
409
410 static void emit_frac(const struct lp_build_tgsi_action *action,
411 struct lp_build_tgsi_context *bld_base,
412 struct lp_build_emit_data *emit_data)
413 {
414 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
415 char *intr;
416
417 if (emit_data->info->opcode == TGSI_OPCODE_FRC)
418 intr = "llvm.floor.f32";
419 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
420 intr = "llvm.floor.f64";
421 else {
422 assert(0);
423 return;
424 }
425
426 LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
427 &emit_data->args[0], 1,
428 LP_FUNC_ATTR_READNONE);
429 emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
430 emit_data->args[0], floor, "");
431 }
432
433 static void emit_f2i(const struct lp_build_tgsi_action *action,
434 struct lp_build_tgsi_context *bld_base,
435 struct lp_build_emit_data *emit_data)
436 {
437 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
438 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
439 emit_data->args[0], bld_base->int_bld.elem_type, "");
440 }
441
442 static void emit_f2u(const struct lp_build_tgsi_action *action,
443 struct lp_build_tgsi_context *bld_base,
444 struct lp_build_emit_data *emit_data)
445 {
446 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
447 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
448 emit_data->args[0], bld_base->uint_bld.elem_type, "");
449 }
450
451 static void emit_i2f(const struct lp_build_tgsi_action *action,
452 struct lp_build_tgsi_context *bld_base,
453 struct lp_build_emit_data *emit_data)
454 {
455 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
456 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
457 emit_data->args[0], bld_base->base.elem_type, "");
458 }
459
460 static void emit_u2f(const struct lp_build_tgsi_action *action,
461 struct lp_build_tgsi_context *bld_base,
462 struct lp_build_emit_data *emit_data)
463 {
464 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
465 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
466 emit_data->args[0], bld_base->base.elem_type, "");
467 }
468
469 static void
470 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
471 struct lp_build_tgsi_context *bld_base,
472 struct lp_build_emit_data *emit_data)
473 {
474 struct lp_build_context *base = &bld_base->base;
475 emit_data->output[emit_data->chan] =
476 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
477 emit_data->dst_type, emit_data->args,
478 emit_data->arg_count, LP_FUNC_ATTR_READNONE);
479 }
480
481 static void emit_bfi(const struct lp_build_tgsi_action *action,
482 struct lp_build_tgsi_context *bld_base,
483 struct lp_build_emit_data *emit_data)
484 {
485 struct gallivm_state *gallivm = bld_base->base.gallivm;
486 LLVMBuilderRef builder = gallivm->builder;
487 LLVMValueRef bfi_args[3];
488 LLVMValueRef bfi_sm5;
489 LLVMValueRef cond;
490
491 // Calculate the bitmask: (((1 << src3) - 1) << src2
492 bfi_args[0] = LLVMBuildShl(builder,
493 LLVMBuildSub(builder,
494 LLVMBuildShl(builder,
495 bld_base->int_bld.one,
496 emit_data->args[3], ""),
497 bld_base->int_bld.one, ""),
498 emit_data->args[2], "");
499
500 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
501 emit_data->args[2], "");
502
503 bfi_args[2] = emit_data->args[0];
504
505 /* Calculate:
506 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
507 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
508 */
509 bfi_sm5 =
510 LLVMBuildXor(builder, bfi_args[2],
511 LLVMBuildAnd(builder, bfi_args[0],
512 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
513 ""), ""), "");
514
515 /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
516 * uses the convenient V_BFI lowering for the above, which follows SM5
517 * and disagrees with GLSL semantics when bits (src3) is 32.
518 */
519 cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
520 lp_build_const_int32(gallivm, 32), "");
521 emit_data->output[emit_data->chan] =
522 LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
523 }
524
525 static void emit_bfe(const struct lp_build_tgsi_action *action,
526 struct lp_build_tgsi_context *bld_base,
527 struct lp_build_emit_data *emit_data)
528 {
529 struct si_shader_context *ctx = si_shader_context(bld_base);
530 struct gallivm_state *gallivm = &ctx->gallivm;
531 LLVMBuilderRef builder = gallivm->builder;
532 LLVMValueRef bfe_sm5;
533 LLVMValueRef cond;
534
535 bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0],
536 emit_data->args[1], emit_data->args[2],
537 emit_data->info->opcode == TGSI_OPCODE_IBFE);
538
539 /* Correct for GLSL semantics. */
540 cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
541 LLVMConstInt(ctx->i32, 32, 0), "");
542 emit_data->output[emit_data->chan] =
543 LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
544 }
545
546 /* this is ffs in C */
547 static void emit_lsb(const struct lp_build_tgsi_action *action,
548 struct lp_build_tgsi_context *bld_base,
549 struct lp_build_emit_data *emit_data)
550 {
551 struct gallivm_state *gallivm = bld_base->base.gallivm;
552 LLVMBuilderRef builder = gallivm->builder;
553 LLVMValueRef args[2] = {
554 emit_data->args[0],
555
556 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
557 * add special code to check for x=0. The reason is that
558 * the LLVM behavior for x=0 is different from what we
559 * need here. However, LLVM also assumes that ffs(x) is
560 * in [0, 31], but GLSL expects that ffs(0) = -1, so
561 * a conditional assignment to handle 0 is still required.
562 */
563 LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
564 };
565
566 LLVMValueRef lsb =
567 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
568 emit_data->dst_type, args, ARRAY_SIZE(args),
569 LP_FUNC_ATTR_READNONE);
570
571 /* TODO: We need an intrinsic to skip this conditional. */
572 /* Check for zero: */
573 emit_data->output[emit_data->chan] =
574 LLVMBuildSelect(builder,
575 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
576 bld_base->uint_bld.zero, ""),
577 lp_build_const_int32(gallivm, -1), lsb, "");
578 }
579
580 /* Find the last bit set. */
581 static void emit_umsb(const struct lp_build_tgsi_action *action,
582 struct lp_build_tgsi_context *bld_base,
583 struct lp_build_emit_data *emit_data)
584 {
585 struct si_shader_context *ctx = si_shader_context(bld_base);
586
587 emit_data->output[emit_data->chan] =
588 ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type);
589 }
590
591 /* Find the last bit opposite of the sign bit. */
592 static void emit_imsb(const struct lp_build_tgsi_action *action,
593 struct lp_build_tgsi_context *bld_base,
594 struct lp_build_emit_data *emit_data)
595 {
596 struct si_shader_context *ctx = si_shader_context(bld_base);
597 emit_data->output[emit_data->chan] =
598 ac_build_imsb(&ctx->ac, emit_data->args[0],
599 emit_data->dst_type);
600 }
601
602 static void emit_iabs(const struct lp_build_tgsi_action *action,
603 struct lp_build_tgsi_context *bld_base,
604 struct lp_build_emit_data *emit_data)
605 {
606 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
607
608 emit_data->output[emit_data->chan] =
609 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
610 emit_data->args[0],
611 LLVMBuildNeg(builder,
612 emit_data->args[0], ""));
613 }
614
615 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
616 struct lp_build_tgsi_context *bld_base,
617 struct lp_build_emit_data *emit_data)
618 {
619 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
620 LLVMIntPredicate op;
621
622 switch (emit_data->info->opcode) {
623 default:
624 assert(0);
625 case TGSI_OPCODE_IMAX:
626 case TGSI_OPCODE_I64MAX:
627 op = LLVMIntSGT;
628 break;
629 case TGSI_OPCODE_IMIN:
630 case TGSI_OPCODE_I64MIN:
631 op = LLVMIntSLT;
632 break;
633 case TGSI_OPCODE_UMAX:
634 case TGSI_OPCODE_U64MAX:
635 op = LLVMIntUGT;
636 break;
637 case TGSI_OPCODE_UMIN:
638 case TGSI_OPCODE_U64MIN:
639 op = LLVMIntULT;
640 break;
641 }
642
643 emit_data->output[emit_data->chan] =
644 LLVMBuildSelect(builder,
645 LLVMBuildICmp(builder, op, emit_data->args[0],
646 emit_data->args[1], ""),
647 emit_data->args[0],
648 emit_data->args[1], "");
649 }
650
651 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
652 struct lp_build_emit_data *emit_data)
653 {
654 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
655 0, TGSI_CHAN_X);
656 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
657 0, TGSI_CHAN_Y);
658 }
659
660 static void emit_pk2h(const struct lp_build_tgsi_action *action,
661 struct lp_build_tgsi_context *bld_base,
662 struct lp_build_emit_data *emit_data)
663 {
664 /* From the GLSL 4.50 spec:
665 * "The rounding mode cannot be set and is undefined."
666 *
667 * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
668 */
669 emit_data->output[emit_data->chan] =
670 ac_build_cvt_pkrtz_f16(&si_shader_context(bld_base)->ac,
671 emit_data->args);
672 }
673
674 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
675 struct lp_build_emit_data *emit_data)
676 {
677 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
678 0, TGSI_CHAN_X);
679 }
680
681 static void emit_up2h(const struct lp_build_tgsi_action *action,
682 struct lp_build_tgsi_context *bld_base,
683 struct lp_build_emit_data *emit_data)
684 {
685 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
686 LLVMContextRef context = bld_base->base.gallivm->context;
687 struct lp_build_context *uint_bld = &bld_base->uint_bld;
688 LLVMTypeRef fp16, i16;
689 LLVMValueRef const16, input, val;
690 unsigned i;
691
692 fp16 = LLVMHalfTypeInContext(context);
693 i16 = LLVMInt16TypeInContext(context);
694 const16 = lp_build_const_int32(uint_bld->gallivm, 16);
695 input = emit_data->args[0];
696
697 for (i = 0; i < 2; i++) {
698 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
699 val = LLVMBuildTrunc(builder, val, i16, "");
700 val = LLVMBuildBitCast(builder, val, fp16, "");
701 emit_data->output[i] =
702 LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
703 }
704 }
705
706 static void emit_fdiv(const struct lp_build_tgsi_action *action,
707 struct lp_build_tgsi_context *bld_base,
708 struct lp_build_emit_data *emit_data)
709 {
710 struct si_shader_context *ctx = si_shader_context(bld_base);
711
712 emit_data->output[emit_data->chan] =
713 LLVMBuildFDiv(ctx->gallivm.builder,
714 emit_data->args[0], emit_data->args[1], "");
715
716 /* Use v_rcp_f32 instead of precise division. */
717 if (!LLVMIsConstant(emit_data->output[emit_data->chan]))
718 LLVMSetMetadata(emit_data->output[emit_data->chan],
719 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
720 }
721
722 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
723 * the target machine. f64 needs global unsafe math flags to get rsq. */
724 static void emit_rsq(const struct lp_build_tgsi_action *action,
725 struct lp_build_tgsi_context *bld_base,
726 struct lp_build_emit_data *emit_data)
727 {
728 LLVMValueRef sqrt =
729 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
730 emit_data->args[0]);
731
732 emit_data->output[emit_data->chan] =
733 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
734 bld_base->base.one, sqrt);
735 }
736
737 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
738 {
739 lp_set_default_actions(bld_base);
740
741 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
742 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
743 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
744 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
745 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32";
746 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
747 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
748 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
749 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
750 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
751 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
752 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
753 bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
754 bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
755 bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
756 bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
757 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
758 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
759 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
760 bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
761 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
762 bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
763 bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
764 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
765 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
766 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
767 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
768 bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
769 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
770 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
771 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
772 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
773 bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
774 bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
775 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
776 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
777 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
778 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
779 bld_base->op_actions[TGSI_OPCODE_FMA].emit =
780 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
781 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
782 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
783 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
784 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
785 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
786 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
787 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
788 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
789 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
790 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
791 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
792 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
793 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
794 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
795 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
796 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
797 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
798 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
799 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
800 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
801 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
802 bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
803 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
804 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
805 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
806 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
807 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
808 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
809 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
810 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
811 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
812 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
813 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
814 bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
815 bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
816 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
817 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
818 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
819 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
820 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
821 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
822 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
823 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
824 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
825 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
826 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
827 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
828 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
829 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
830 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
831 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
832 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
833 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
834 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
835 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
836 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
837 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
838 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
839 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
840 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
841 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
842 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
843 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
844 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
845 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
846 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
847 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
848 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
849 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
850 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
851 bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
852 bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
853
854 bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
855 bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
856 bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
857 bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
858 bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
859 bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
860 bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
861
862 bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
863 bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
864 bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
865 bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
866 bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
867 bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
868
869 bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
870 bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
871 bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
872 bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
873
874 bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
875 bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
876 bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
877 bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
878 }