radeonsi: update copyrights
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_alu.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "gallivm/lp_bld_const.h"
27 #include "gallivm/lp_bld_intr.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "amd/common/ac_llvm_build.h"
31
32 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
33 struct lp_build_emit_data *emit_data)
34 {
35 const struct tgsi_full_instruction *inst = emit_data->inst;
36 struct si_shader_context *ctx = si_shader_context(bld_base);
37 LLVMBuilderRef builder = ctx->ac.builder;
38 unsigned i;
39 LLVMValueRef conds[TGSI_NUM_CHANNELS];
40
41 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
42 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
43 conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
44 ctx->ac.f32_0, "");
45 }
46
47 /* And the conditions together */
48 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
49 conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
50 }
51
52 emit_data->dst_type = ctx->voidt;
53 emit_data->arg_count = 1;
54 emit_data->args[0] = conds[0];
55 }
56
57 void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
58 {
59 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
60 LLVMBuilderRef builder = ctx->ac.builder;
61
62 if (ctx->shader->selector->force_correct_derivs_after_kill) {
63 /* LLVM 6.0 can kill immediately while maintaining WQM. */
64 if (HAVE_LLVM >= 0x0600) {
65 ac_build_kill_if_false(&ctx->ac,
66 ac_build_wqm_vote(&ctx->ac, visible));
67 }
68
69 LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
70 mask = LLVMBuildAnd(builder, mask, visible, "");
71 LLVMBuildStore(builder, mask, ctx->postponed_kill);
72 return;
73 }
74
75 ac_build_kill_if_false(&ctx->ac, visible);
76 }
77
78 static void kil_emit(const struct lp_build_tgsi_action *action,
79 struct lp_build_tgsi_context *bld_base,
80 struct lp_build_emit_data *emit_data)
81 {
82 struct si_shader_context *ctx = si_shader_context(bld_base);
83 LLVMValueRef visible;
84
85 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
86 visible = emit_data->args[0];
87 } else {
88 assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
89 visible = LLVMConstInt(ctx->i1, false, 0);
90 }
91
92 si_llvm_emit_kill(&ctx->abi, visible);
93 }
94
95 static void emit_icmp(const struct lp_build_tgsi_action *action,
96 struct lp_build_tgsi_context *bld_base,
97 struct lp_build_emit_data *emit_data)
98 {
99 unsigned pred;
100 struct si_shader_context *ctx = si_shader_context(bld_base);
101
102 switch (emit_data->inst->Instruction.Opcode) {
103 case TGSI_OPCODE_USEQ:
104 case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
105 case TGSI_OPCODE_USNE:
106 case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
107 case TGSI_OPCODE_USGE:
108 case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
109 case TGSI_OPCODE_USLT:
110 case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
111 case TGSI_OPCODE_ISGE:
112 case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
113 case TGSI_OPCODE_ISLT:
114 case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
115 default:
116 assert(!"unknown instruction");
117 pred = 0;
118 break;
119 }
120
121 LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred,
122 emit_data->args[0], emit_data->args[1],"");
123
124 v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
125
126 emit_data->output[emit_data->chan] = v;
127 }
128
129 static void emit_ucmp(const struct lp_build_tgsi_action *action,
130 struct lp_build_tgsi_context *bld_base,
131 struct lp_build_emit_data *emit_data)
132 {
133 struct si_shader_context *ctx = si_shader_context(bld_base);
134 LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);
135
136 LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0,
137 ctx->i32_0, "");
138
139 emit_data->output[emit_data->chan] =
140 LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], "");
141 }
142
143 static void emit_cmp(const struct lp_build_tgsi_action *action,
144 struct lp_build_tgsi_context *bld_base,
145 struct lp_build_emit_data *emit_data)
146 {
147 struct si_shader_context *ctx = si_shader_context(bld_base);
148 LLVMValueRef cond, *args = emit_data->args;
149
150 cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0],
151 ctx->ac.f32_0, "");
152
153 emit_data->output[emit_data->chan] =
154 LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], "");
155 }
156
157 static void emit_set_cond(const struct lp_build_tgsi_action *action,
158 struct lp_build_tgsi_context *bld_base,
159 struct lp_build_emit_data *emit_data)
160 {
161 struct si_shader_context *ctx = si_shader_context(bld_base);
162 LLVMRealPredicate pred;
163 LLVMValueRef cond;
164
165 /* Use ordered for everything but NE (which is usual for
166 * float comparisons)
167 */
168 switch (emit_data->inst->Instruction.Opcode) {
169 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
170 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
171 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
172 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
173 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
174 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
175 default: assert(!"unknown instruction"); pred = 0; break;
176 }
177
178 cond = LLVMBuildFCmp(ctx->ac.builder,
179 pred, emit_data->args[0], emit_data->args[1], "");
180
181 emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder,
182 cond, ctx->ac.f32_1, ctx->ac.f32_0, "");
183 }
184
185 static void emit_fcmp(const struct lp_build_tgsi_action *action,
186 struct lp_build_tgsi_context *bld_base,
187 struct lp_build_emit_data *emit_data)
188 {
189 struct si_shader_context *ctx = si_shader_context(bld_base);
190 LLVMRealPredicate pred;
191
192 /* Use ordered for everything but NE (which is usual for
193 * float comparisons)
194 */
195 switch (emit_data->inst->Instruction.Opcode) {
196 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
197 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
198 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
199 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
200 default: assert(!"unknown instruction"); pred = 0; break;
201 }
202
203 LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
204 emit_data->args[0], emit_data->args[1],"");
205
206 v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
207
208 emit_data->output[emit_data->chan] = v;
209 }
210
211 static void emit_dcmp(const struct lp_build_tgsi_action *action,
212 struct lp_build_tgsi_context *bld_base,
213 struct lp_build_emit_data *emit_data)
214 {
215 struct si_shader_context *ctx = si_shader_context(bld_base);
216 LLVMRealPredicate pred;
217
218 /* Use ordered for everything but NE (which is usual for
219 * float comparisons)
220 */
221 switch (emit_data->inst->Instruction.Opcode) {
222 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
223 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
224 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
225 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
226 default: assert(!"unknown instruction"); pred = 0; break;
227 }
228
229 LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
230 emit_data->args[0], emit_data->args[1],"");
231
232 v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
233
234 emit_data->output[emit_data->chan] = v;
235 }
236
237 static void emit_not(const struct lp_build_tgsi_action *action,
238 struct lp_build_tgsi_context *bld_base,
239 struct lp_build_emit_data *emit_data)
240 {
241 struct si_shader_context *ctx = si_shader_context(bld_base);
242 LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
243 emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, "");
244 }
245
246 static void emit_arl(const struct lp_build_tgsi_action *action,
247 struct lp_build_tgsi_context *bld_base,
248 struct lp_build_emit_data *emit_data)
249 {
250 struct si_shader_context *ctx = si_shader_context(bld_base);
251 LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
252 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
253 floor_index, ctx->i32, "");
254 }
255
256 static void emit_and(const struct lp_build_tgsi_action *action,
257 struct lp_build_tgsi_context *bld_base,
258 struct lp_build_emit_data *emit_data)
259 {
260 struct si_shader_context *ctx = si_shader_context(bld_base);
261 emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder,
262 emit_data->args[0], emit_data->args[1], "");
263 }
264
265 static void emit_or(const struct lp_build_tgsi_action *action,
266 struct lp_build_tgsi_context *bld_base,
267 struct lp_build_emit_data *emit_data)
268 {
269 struct si_shader_context *ctx = si_shader_context(bld_base);
270 emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder,
271 emit_data->args[0], emit_data->args[1], "");
272 }
273
274 static void emit_uadd(const struct lp_build_tgsi_action *action,
275 struct lp_build_tgsi_context *bld_base,
276 struct lp_build_emit_data *emit_data)
277 {
278 struct si_shader_context *ctx = si_shader_context(bld_base);
279 emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder,
280 emit_data->args[0], emit_data->args[1], "");
281 }
282
283 static void emit_udiv(const struct lp_build_tgsi_action *action,
284 struct lp_build_tgsi_context *bld_base,
285 struct lp_build_emit_data *emit_data)
286 {
287 struct si_shader_context *ctx = si_shader_context(bld_base);
288 emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder,
289 emit_data->args[0], emit_data->args[1], "");
290 }
291
292 static void emit_idiv(const struct lp_build_tgsi_action *action,
293 struct lp_build_tgsi_context *bld_base,
294 struct lp_build_emit_data *emit_data)
295 {
296 struct si_shader_context *ctx = si_shader_context(bld_base);
297 emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder,
298 emit_data->args[0], emit_data->args[1], "");
299 }
300
301 static void emit_mod(const struct lp_build_tgsi_action *action,
302 struct lp_build_tgsi_context *bld_base,
303 struct lp_build_emit_data *emit_data)
304 {
305 struct si_shader_context *ctx = si_shader_context(bld_base);
306 emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder,
307 emit_data->args[0], emit_data->args[1], "");
308 }
309
310 static void emit_umod(const struct lp_build_tgsi_action *action,
311 struct lp_build_tgsi_context *bld_base,
312 struct lp_build_emit_data *emit_data)
313 {
314 struct si_shader_context *ctx = si_shader_context(bld_base);
315 emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder,
316 emit_data->args[0], emit_data->args[1], "");
317 }
318
319 static void emit_shl(const struct lp_build_tgsi_action *action,
320 struct lp_build_tgsi_context *bld_base,
321 struct lp_build_emit_data *emit_data)
322 {
323 struct si_shader_context *ctx = si_shader_context(bld_base);
324 emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder,
325 emit_data->args[0], emit_data->args[1], "");
326 }
327
328 static void emit_ushr(const struct lp_build_tgsi_action *action,
329 struct lp_build_tgsi_context *bld_base,
330 struct lp_build_emit_data *emit_data)
331 {
332 struct si_shader_context *ctx = si_shader_context(bld_base);
333 emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder,
334 emit_data->args[0], emit_data->args[1], "");
335 }
336 static void emit_ishr(const struct lp_build_tgsi_action *action,
337 struct lp_build_tgsi_context *bld_base,
338 struct lp_build_emit_data *emit_data)
339 {
340 struct si_shader_context *ctx = si_shader_context(bld_base);
341 emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder,
342 emit_data->args[0], emit_data->args[1], "");
343 }
344
345 static void emit_xor(const struct lp_build_tgsi_action *action,
346 struct lp_build_tgsi_context *bld_base,
347 struct lp_build_emit_data *emit_data)
348 {
349 struct si_shader_context *ctx = si_shader_context(bld_base);
350 emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder,
351 emit_data->args[0], emit_data->args[1], "");
352 }
353
354 static void emit_ssg(const struct lp_build_tgsi_action *action,
355 struct lp_build_tgsi_context *bld_base,
356 struct lp_build_emit_data *emit_data)
357 {
358 struct si_shader_context *ctx = si_shader_context(bld_base);
359
360 LLVMValueRef val;
361
362 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
363 val = ac_build_isign(&ctx->ac, emit_data->args[0], 64);
364 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
365 val = ac_build_isign(&ctx->ac, emit_data->args[0], 32);
366 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
367 val = ac_build_fsign(&ctx->ac, emit_data->args[0], 64);
368 } else {
369 val = ac_build_fsign(&ctx->ac, emit_data->args[0], 32);
370 }
371
372 emit_data->output[emit_data->chan] = val;
373 }
374
375 static void emit_ineg(const struct lp_build_tgsi_action *action,
376 struct lp_build_tgsi_context *bld_base,
377 struct lp_build_emit_data *emit_data)
378 {
379 struct si_shader_context *ctx = si_shader_context(bld_base);
380 emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder,
381 emit_data->args[0], "");
382 }
383
384 static void emit_dneg(const struct lp_build_tgsi_action *action,
385 struct lp_build_tgsi_context *bld_base,
386 struct lp_build_emit_data *emit_data)
387 {
388 struct si_shader_context *ctx = si_shader_context(bld_base);
389 emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder,
390 emit_data->args[0], "");
391 }
392
393 static void emit_frac(const struct lp_build_tgsi_action *action,
394 struct lp_build_tgsi_context *bld_base,
395 struct lp_build_emit_data *emit_data)
396 {
397 struct si_shader_context *ctx = si_shader_context(bld_base);
398 unsigned bitsize;
399
400 if (emit_data->info->opcode == TGSI_OPCODE_FRC)
401 bitsize = 32;
402 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
403 bitsize = 64;
404 else {
405 assert(0);
406 return;
407 }
408
409 emit_data->output[emit_data->chan] =
410 ac_build_fract(&ctx->ac, emit_data->args[0], bitsize);
411 }
412
413 static void emit_f2i(const struct lp_build_tgsi_action *action,
414 struct lp_build_tgsi_context *bld_base,
415 struct lp_build_emit_data *emit_data)
416 {
417 struct si_shader_context *ctx = si_shader_context(bld_base);
418 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
419 emit_data->args[0], ctx->i32, "");
420 }
421
422 static void emit_f2u(const struct lp_build_tgsi_action *action,
423 struct lp_build_tgsi_context *bld_base,
424 struct lp_build_emit_data *emit_data)
425 {
426 struct si_shader_context *ctx = si_shader_context(bld_base);
427 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder,
428 emit_data->args[0], ctx->i32, "");
429 }
430
431 static void emit_i2f(const struct lp_build_tgsi_action *action,
432 struct lp_build_tgsi_context *bld_base,
433 struct lp_build_emit_data *emit_data)
434 {
435 struct si_shader_context *ctx = si_shader_context(bld_base);
436 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder,
437 emit_data->args[0], ctx->f32, "");
438 }
439
440 static void emit_u2f(const struct lp_build_tgsi_action *action,
441 struct lp_build_tgsi_context *bld_base,
442 struct lp_build_emit_data *emit_data)
443 {
444 struct si_shader_context *ctx = si_shader_context(bld_base);
445 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder,
446 emit_data->args[0], ctx->f32, "");
447 }
448
449 static void
450 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
451 struct lp_build_tgsi_context *bld_base,
452 struct lp_build_emit_data *emit_data)
453 {
454 struct si_shader_context *ctx = si_shader_context(bld_base);
455 emit_data->output[emit_data->chan] =
456 lp_build_intrinsic(ctx->ac.builder, action->intr_name,
457 emit_data->dst_type, emit_data->args,
458 emit_data->arg_count, LP_FUNC_ATTR_READNONE);
459 }
460
461 static void emit_bfi(const struct lp_build_tgsi_action *action,
462 struct lp_build_tgsi_context *bld_base,
463 struct lp_build_emit_data *emit_data)
464 {
465 struct si_shader_context *ctx = si_shader_context(bld_base);
466 LLVMBuilderRef builder = ctx->ac.builder;
467 LLVMValueRef bfi_args[3];
468 LLVMValueRef bfi_sm5;
469 LLVMValueRef cond;
470
471 // Calculate the bitmask: (((1 << src3) - 1) << src2
472 bfi_args[0] = LLVMBuildShl(builder,
473 LLVMBuildSub(builder,
474 LLVMBuildShl(builder,
475 ctx->i32_1,
476 emit_data->args[3], ""),
477 ctx->i32_1, ""),
478 emit_data->args[2], "");
479
480 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
481 emit_data->args[2], "");
482
483 bfi_args[2] = emit_data->args[0];
484
485 /* Calculate:
486 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
487 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
488 */
489 bfi_sm5 =
490 LLVMBuildXor(builder, bfi_args[2],
491 LLVMBuildAnd(builder, bfi_args[0],
492 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
493 ""), ""), "");
494
495 /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
496 * uses the convenient V_BFI lowering for the above, which follows SM5
497 * and disagrees with GLSL semantics when bits (src3) is 32.
498 */
499 cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
500 LLVMConstInt(ctx->i32, 32, 0), "");
501 emit_data->output[emit_data->chan] =
502 LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
503 }
504
505 static void emit_bfe(const struct lp_build_tgsi_action *action,
506 struct lp_build_tgsi_context *bld_base,
507 struct lp_build_emit_data *emit_data)
508 {
509 struct si_shader_context *ctx = si_shader_context(bld_base);
510 LLVMValueRef bfe_sm5;
511 LLVMValueRef cond;
512
513 bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0],
514 emit_data->args[1], emit_data->args[2],
515 emit_data->info->opcode == TGSI_OPCODE_IBFE);
516
517 /* Correct for GLSL semantics. */
518 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
519 LLVMConstInt(ctx->i32, 32, 0), "");
520 emit_data->output[emit_data->chan] =
521 LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
522 }
523
524 /* this is ffs in C */
525 static void emit_lsb(const struct lp_build_tgsi_action *action,
526 struct lp_build_tgsi_context *bld_base,
527 struct lp_build_emit_data *emit_data)
528 {
529 struct si_shader_context *ctx = si_shader_context(bld_base);
530
531 emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]);
532 }
533
534 /* Find the last bit set. */
535 static void emit_umsb(const struct lp_build_tgsi_action *action,
536 struct lp_build_tgsi_context *bld_base,
537 struct lp_build_emit_data *emit_data)
538 {
539 struct si_shader_context *ctx = si_shader_context(bld_base);
540
541 emit_data->output[emit_data->chan] =
542 ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type);
543 }
544
545 /* Find the last bit opposite of the sign bit. */
546 static void emit_imsb(const struct lp_build_tgsi_action *action,
547 struct lp_build_tgsi_context *bld_base,
548 struct lp_build_emit_data *emit_data)
549 {
550 struct si_shader_context *ctx = si_shader_context(bld_base);
551 emit_data->output[emit_data->chan] =
552 ac_build_imsb(&ctx->ac, emit_data->args[0],
553 emit_data->dst_type);
554 }
555
556 static void emit_iabs(const struct lp_build_tgsi_action *action,
557 struct lp_build_tgsi_context *bld_base,
558 struct lp_build_emit_data *emit_data)
559 {
560 struct si_shader_context *ctx = si_shader_context(bld_base);
561
562 emit_data->output[emit_data->chan] =
563 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
564 emit_data->args[0],
565 LLVMBuildNeg(ctx->ac.builder,
566 emit_data->args[0], ""));
567 }
568
569 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
570 struct lp_build_tgsi_context *bld_base,
571 struct lp_build_emit_data *emit_data)
572 {
573 struct si_shader_context *ctx = si_shader_context(bld_base);
574 LLVMIntPredicate op;
575
576 switch (emit_data->info->opcode) {
577 default:
578 assert(0);
579 case TGSI_OPCODE_IMAX:
580 case TGSI_OPCODE_I64MAX:
581 op = LLVMIntSGT;
582 break;
583 case TGSI_OPCODE_IMIN:
584 case TGSI_OPCODE_I64MIN:
585 op = LLVMIntSLT;
586 break;
587 case TGSI_OPCODE_UMAX:
588 case TGSI_OPCODE_U64MAX:
589 op = LLVMIntUGT;
590 break;
591 case TGSI_OPCODE_UMIN:
592 case TGSI_OPCODE_U64MIN:
593 op = LLVMIntULT;
594 break;
595 }
596
597 emit_data->output[emit_data->chan] =
598 LLVMBuildSelect(ctx->ac.builder,
599 LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0],
600 emit_data->args[1], ""),
601 emit_data->args[0],
602 emit_data->args[1], "");
603 }
604
605 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
606 struct lp_build_emit_data *emit_data)
607 {
608 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
609 0, TGSI_CHAN_X);
610 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
611 0, TGSI_CHAN_Y);
612 }
613
614 static void emit_pk2h(const struct lp_build_tgsi_action *action,
615 struct lp_build_tgsi_context *bld_base,
616 struct lp_build_emit_data *emit_data)
617 {
618 /* From the GLSL 4.50 spec:
619 * "The rounding mode cannot be set and is undefined."
620 *
621 * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
622 */
623 emit_data->output[emit_data->chan] =
624 ac_build_cvt_pkrtz_f16(&si_shader_context(bld_base)->ac,
625 emit_data->args);
626 }
627
628 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
629 struct lp_build_emit_data *emit_data)
630 {
631 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
632 0, TGSI_CHAN_X);
633 }
634
635 static void emit_up2h(const struct lp_build_tgsi_action *action,
636 struct lp_build_tgsi_context *bld_base,
637 struct lp_build_emit_data *emit_data)
638 {
639 struct si_shader_context *ctx = si_shader_context(bld_base);
640 LLVMTypeRef i16;
641 LLVMValueRef const16, input, val;
642 unsigned i;
643
644 i16 = LLVMInt16TypeInContext(ctx->ac.context);
645 const16 = LLVMConstInt(ctx->i32, 16, 0);
646 input = emit_data->args[0];
647
648 for (i = 0; i < 2; i++) {
649 val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
650 val = LLVMBuildTrunc(ctx->ac.builder, val, i16, "");
651 val = ac_to_float(&ctx->ac, val);
652 emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, "");
653 }
654 }
655
656 static void emit_fdiv(const struct lp_build_tgsi_action *action,
657 struct lp_build_tgsi_context *bld_base,
658 struct lp_build_emit_data *emit_data)
659 {
660 struct si_shader_context *ctx = si_shader_context(bld_base);
661
662 emit_data->output[emit_data->chan] =
663 ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]);
664 }
665
666 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
667 * the target machine. f64 needs global unsafe math flags to get rsq. */
668 static void emit_rsq(const struct lp_build_tgsi_action *action,
669 struct lp_build_tgsi_context *bld_base,
670 struct lp_build_emit_data *emit_data)
671 {
672 struct si_shader_context *ctx = si_shader_context(bld_base);
673
674 LLVMValueRef sqrt =
675 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
676 emit_data->args[0]);
677
678 emit_data->output[emit_data->chan] =
679 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
680 ctx->ac.f32_1, sqrt);
681 }
682
683 static void dfracexp_fetch_args(struct lp_build_tgsi_context *bld_base,
684 struct lp_build_emit_data *emit_data)
685 {
686 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
687 emit_data->arg_count = 1;
688 }
689
690 static void dfracexp_emit(const struct lp_build_tgsi_action *action,
691 struct lp_build_tgsi_context *bld_base,
692 struct lp_build_emit_data *emit_data)
693 {
694 struct si_shader_context *ctx = si_shader_context(bld_base);
695
696 emit_data->output[emit_data->chan] =
697 lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.frexp.mant.f64",
698 ctx->ac.f64, &emit_data->args[0], 1, 0);
699 emit_data->output1[emit_data->chan] =
700 lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.frexp.exp.i32.f64",
701 ctx->ac.i32, &emit_data->args[0], 1, 0);
702 }
703
704 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
705 {
706 lp_set_default_actions(bld_base);
707
708 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
709 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
710 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
711 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
712 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32";
713 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
714 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
715 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
716 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
717 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
718 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
719 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
720 bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
721 bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
722 bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
723 bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
724 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
725 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
726 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
727 bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
728 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
729 bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
730 bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
731 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
732 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
733 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
734 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
735 bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
736 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
737 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
738 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
739 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
740 bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
741 bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
742 bld_base->op_actions[TGSI_OPCODE_DFRACEXP].fetch_args = dfracexp_fetch_args;
743 bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
744 bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
745 bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
746 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
747 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
748 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
749 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
750 bld_base->op_actions[TGSI_OPCODE_FMA].emit =
751 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
752 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
753 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
754 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
755 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
756 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
757 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
758 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
759 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
760 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
761 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
762 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
763 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
764 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
765 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
766 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
767 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
768 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
769 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
770 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
771 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
772 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
773 bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
774 bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
775 bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32";
776 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
777 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
778 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
779 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
780 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
781 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
782 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
783 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
784 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
785 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
786 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
787 bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
788 bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
789 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
790 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
791 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
792 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
793 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
794 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
795 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
796 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
797 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
798 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
799 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
800 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
801 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
802 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
803 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
804 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
805 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
806 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
807 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
808 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
809 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
810 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
811 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
812 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
813 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
814 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
815 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
816 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
817 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
818 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
819 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
820 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
821 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
822 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
823 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
824 bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
825 bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
826
827 bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
828 bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
829 bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
830 bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
831 bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
832 bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
833 bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
834
835 bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
836 bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
837 bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
838 bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
839 bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
840 bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
841
842 bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
843 bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
844 bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
845 bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
846
847 bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
848 bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
849 bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
850 bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
851 }