2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
27 #include "ac_llvm_util.h"
29 void si_llvm_emit_kill(struct ac_shader_abi
*abi
, LLVMValueRef visible
)
31 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
32 LLVMBuilderRef builder
= ctx
->ac
.builder
;
34 if (ctx
->shader
->selector
->force_correct_derivs_after_kill
) {
35 /* Kill immediately while maintaining WQM. */
36 ac_build_kill_if_false(&ctx
->ac
,
37 ac_build_wqm_vote(&ctx
->ac
, visible
));
39 LLVMValueRef mask
= LLVMBuildLoad(builder
, ctx
->postponed_kill
, "");
40 mask
= LLVMBuildAnd(builder
, mask
, visible
, "");
41 LLVMBuildStore(builder
, mask
, ctx
->postponed_kill
);
45 ac_build_kill_if_false(&ctx
->ac
, visible
);
48 static void kil_emit(const struct lp_build_tgsi_action
*action
,
49 struct lp_build_tgsi_context
*bld_base
,
50 struct lp_build_emit_data
*emit_data
)
52 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
55 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_KILL_IF
) {
56 const struct tgsi_full_instruction
*inst
= emit_data
->inst
;
57 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
58 LLVMBuilderRef builder
= ctx
->ac
.builder
;
60 LLVMValueRef conds
[TGSI_NUM_CHANNELS
];
62 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
63 LLVMValueRef value
= lp_build_emit_fetch(bld_base
, inst
, 0, i
);
64 /* UGE because NaN shouldn't get killed */
65 conds
[i
] = LLVMBuildFCmp(builder
, LLVMRealUGE
, value
,
69 /* And the conditions together */
70 for (i
= TGSI_NUM_CHANNELS
- 1; i
> 0; i
--) {
71 conds
[i
- 1] = LLVMBuildAnd(builder
, conds
[i
], conds
[i
- 1], "");
75 assert(emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_KILL
);
76 visible
= ctx
->i1false
;
79 si_llvm_emit_kill(&ctx
->abi
, visible
);
82 static void emit_icmp(const struct lp_build_tgsi_action
*action
,
83 struct lp_build_tgsi_context
*bld_base
,
84 struct lp_build_emit_data
*emit_data
)
87 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
89 switch (emit_data
->inst
->Instruction
.Opcode
) {
90 case TGSI_OPCODE_USEQ
:
91 case TGSI_OPCODE_U64SEQ
: pred
= LLVMIntEQ
; break;
92 case TGSI_OPCODE_USNE
:
93 case TGSI_OPCODE_U64SNE
: pred
= LLVMIntNE
; break;
94 case TGSI_OPCODE_USGE
:
95 case TGSI_OPCODE_U64SGE
: pred
= LLVMIntUGE
; break;
96 case TGSI_OPCODE_USLT
:
97 case TGSI_OPCODE_U64SLT
: pred
= LLVMIntULT
; break;
98 case TGSI_OPCODE_ISGE
:
99 case TGSI_OPCODE_I64SGE
: pred
= LLVMIntSGE
; break;
100 case TGSI_OPCODE_ISLT
:
101 case TGSI_OPCODE_I64SLT
: pred
= LLVMIntSLT
; break;
103 assert(!"unknown instruction");
108 LLVMValueRef v
= LLVMBuildICmp(ctx
->ac
.builder
, pred
,
109 emit_data
->args
[0], emit_data
->args
[1],"");
111 v
= LLVMBuildSExtOrBitCast(ctx
->ac
.builder
, v
, ctx
->i32
, "");
113 emit_data
->output
[emit_data
->chan
] = v
;
116 static void emit_ucmp(const struct lp_build_tgsi_action
*action
,
117 struct lp_build_tgsi_context
*bld_base
,
118 struct lp_build_emit_data
*emit_data
)
120 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
121 LLVMValueRef arg0
= ac_to_integer(&ctx
->ac
, emit_data
->args
[0]);
123 LLVMValueRef v
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntNE
, arg0
,
126 emit_data
->output
[emit_data
->chan
] =
127 LLVMBuildSelect(ctx
->ac
.builder
, v
, emit_data
->args
[1], emit_data
->args
[2], "");
130 static void emit_cmp(const struct lp_build_tgsi_action
*action
,
131 struct lp_build_tgsi_context
*bld_base
,
132 struct lp_build_emit_data
*emit_data
)
134 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
135 LLVMValueRef cond
, *args
= emit_data
->args
;
137 cond
= LLVMBuildFCmp(ctx
->ac
.builder
, LLVMRealOLT
, args
[0],
140 emit_data
->output
[emit_data
->chan
] =
141 LLVMBuildSelect(ctx
->ac
.builder
, cond
, args
[1], args
[2], "");
144 static void emit_set_cond(const struct lp_build_tgsi_action
*action
,
145 struct lp_build_tgsi_context
*bld_base
,
146 struct lp_build_emit_data
*emit_data
)
148 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
149 LLVMRealPredicate pred
;
152 /* Use ordered for everything but NE (which is usual for
155 switch (emit_data
->inst
->Instruction
.Opcode
) {
156 case TGSI_OPCODE_SGE
: pred
= LLVMRealOGE
; break;
157 case TGSI_OPCODE_SEQ
: pred
= LLVMRealOEQ
; break;
158 case TGSI_OPCODE_SLE
: pred
= LLVMRealOLE
; break;
159 case TGSI_OPCODE_SLT
: pred
= LLVMRealOLT
; break;
160 case TGSI_OPCODE_SNE
: pred
= LLVMRealUNE
; break;
161 case TGSI_OPCODE_SGT
: pred
= LLVMRealOGT
; break;
162 default: assert(!"unknown instruction"); pred
= 0; break;
165 cond
= LLVMBuildFCmp(ctx
->ac
.builder
,
166 pred
, emit_data
->args
[0], emit_data
->args
[1], "");
168 emit_data
->output
[emit_data
->chan
] = LLVMBuildSelect(ctx
->ac
.builder
,
169 cond
, ctx
->ac
.f32_1
, ctx
->ac
.f32_0
, "");
172 static void emit_fcmp(const struct lp_build_tgsi_action
*action
,
173 struct lp_build_tgsi_context
*bld_base
,
174 struct lp_build_emit_data
*emit_data
)
176 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
177 LLVMRealPredicate pred
;
179 /* Use ordered for everything but NE (which is usual for
182 switch (emit_data
->inst
->Instruction
.Opcode
) {
183 case TGSI_OPCODE_FSEQ
: pred
= LLVMRealOEQ
; break;
184 case TGSI_OPCODE_FSGE
: pred
= LLVMRealOGE
; break;
185 case TGSI_OPCODE_FSLT
: pred
= LLVMRealOLT
; break;
186 case TGSI_OPCODE_FSNE
: pred
= LLVMRealUNE
; break;
187 default: assert(!"unknown instruction"); pred
= 0; break;
190 LLVMValueRef v
= LLVMBuildFCmp(ctx
->ac
.builder
, pred
,
191 emit_data
->args
[0], emit_data
->args
[1],"");
193 v
= LLVMBuildSExtOrBitCast(ctx
->ac
.builder
, v
, ctx
->i32
, "");
195 emit_data
->output
[emit_data
->chan
] = v
;
198 static void emit_dcmp(const struct lp_build_tgsi_action
*action
,
199 struct lp_build_tgsi_context
*bld_base
,
200 struct lp_build_emit_data
*emit_data
)
202 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
203 LLVMRealPredicate pred
;
205 /* Use ordered for everything but NE (which is usual for
208 switch (emit_data
->inst
->Instruction
.Opcode
) {
209 case TGSI_OPCODE_DSEQ
: pred
= LLVMRealOEQ
; break;
210 case TGSI_OPCODE_DSGE
: pred
= LLVMRealOGE
; break;
211 case TGSI_OPCODE_DSLT
: pred
= LLVMRealOLT
; break;
212 case TGSI_OPCODE_DSNE
: pred
= LLVMRealUNE
; break;
213 default: assert(!"unknown instruction"); pred
= 0; break;
216 LLVMValueRef v
= LLVMBuildFCmp(ctx
->ac
.builder
, pred
,
217 emit_data
->args
[0], emit_data
->args
[1],"");
219 v
= LLVMBuildSExtOrBitCast(ctx
->ac
.builder
, v
, ctx
->i32
, "");
221 emit_data
->output
[emit_data
->chan
] = v
;
224 static void emit_not(const struct lp_build_tgsi_action
*action
,
225 struct lp_build_tgsi_context
*bld_base
,
226 struct lp_build_emit_data
*emit_data
)
228 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
229 LLVMValueRef v
= ac_to_integer(&ctx
->ac
, emit_data
->args
[0]);
230 emit_data
->output
[emit_data
->chan
] = LLVMBuildNot(ctx
->ac
.builder
, v
, "");
233 static void emit_arl(const struct lp_build_tgsi_action
*action
,
234 struct lp_build_tgsi_context
*bld_base
,
235 struct lp_build_emit_data
*emit_data
)
237 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
238 LLVMValueRef floor_index
=
239 ac_build_intrinsic(&ctx
->ac
, "llvm.floor.f32", ctx
->f32
,
240 &emit_data
->args
[0], 1, AC_FUNC_ATTR_READNONE
);
241 emit_data
->output
[emit_data
->chan
] = LLVMBuildFPToSI(ctx
->ac
.builder
,
242 floor_index
, ctx
->i32
, "");
245 static void emit_and(const struct lp_build_tgsi_action
*action
,
246 struct lp_build_tgsi_context
*bld_base
,
247 struct lp_build_emit_data
*emit_data
)
249 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
250 emit_data
->output
[emit_data
->chan
] = LLVMBuildAnd(ctx
->ac
.builder
,
251 emit_data
->args
[0], emit_data
->args
[1], "");
254 static void emit_or(const struct lp_build_tgsi_action
*action
,
255 struct lp_build_tgsi_context
*bld_base
,
256 struct lp_build_emit_data
*emit_data
)
258 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
259 emit_data
->output
[emit_data
->chan
] = LLVMBuildOr(ctx
->ac
.builder
,
260 emit_data
->args
[0], emit_data
->args
[1], "");
263 static void emit_uadd(const struct lp_build_tgsi_action
*action
,
264 struct lp_build_tgsi_context
*bld_base
,
265 struct lp_build_emit_data
*emit_data
)
267 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
268 emit_data
->output
[emit_data
->chan
] = LLVMBuildAdd(ctx
->ac
.builder
,
269 emit_data
->args
[0], emit_data
->args
[1], "");
272 static void emit_udiv(const struct lp_build_tgsi_action
*action
,
273 struct lp_build_tgsi_context
*bld_base
,
274 struct lp_build_emit_data
*emit_data
)
276 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
277 emit_data
->output
[emit_data
->chan
] = LLVMBuildUDiv(ctx
->ac
.builder
,
278 emit_data
->args
[0], emit_data
->args
[1], "");
281 static void emit_idiv(const struct lp_build_tgsi_action
*action
,
282 struct lp_build_tgsi_context
*bld_base
,
283 struct lp_build_emit_data
*emit_data
)
285 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
286 emit_data
->output
[emit_data
->chan
] = LLVMBuildSDiv(ctx
->ac
.builder
,
287 emit_data
->args
[0], emit_data
->args
[1], "");
290 static void emit_mod(const struct lp_build_tgsi_action
*action
,
291 struct lp_build_tgsi_context
*bld_base
,
292 struct lp_build_emit_data
*emit_data
)
294 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
295 emit_data
->output
[emit_data
->chan
] = LLVMBuildSRem(ctx
->ac
.builder
,
296 emit_data
->args
[0], emit_data
->args
[1], "");
299 static void emit_umod(const struct lp_build_tgsi_action
*action
,
300 struct lp_build_tgsi_context
*bld_base
,
301 struct lp_build_emit_data
*emit_data
)
303 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
304 emit_data
->output
[emit_data
->chan
] = LLVMBuildURem(ctx
->ac
.builder
,
305 emit_data
->args
[0], emit_data
->args
[1], "");
308 static void emit_shl(const struct lp_build_tgsi_action
*action
,
309 struct lp_build_tgsi_context
*bld_base
,
310 struct lp_build_emit_data
*emit_data
)
312 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
313 emit_data
->output
[emit_data
->chan
] = LLVMBuildShl(ctx
->ac
.builder
,
314 emit_data
->args
[0], emit_data
->args
[1], "");
317 static void emit_ushr(const struct lp_build_tgsi_action
*action
,
318 struct lp_build_tgsi_context
*bld_base
,
319 struct lp_build_emit_data
*emit_data
)
321 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
322 emit_data
->output
[emit_data
->chan
] = LLVMBuildLShr(ctx
->ac
.builder
,
323 emit_data
->args
[0], emit_data
->args
[1], "");
325 static void emit_ishr(const struct lp_build_tgsi_action
*action
,
326 struct lp_build_tgsi_context
*bld_base
,
327 struct lp_build_emit_data
*emit_data
)
329 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
330 emit_data
->output
[emit_data
->chan
] = LLVMBuildAShr(ctx
->ac
.builder
,
331 emit_data
->args
[0], emit_data
->args
[1], "");
334 static void emit_xor(const struct lp_build_tgsi_action
*action
,
335 struct lp_build_tgsi_context
*bld_base
,
336 struct lp_build_emit_data
*emit_data
)
338 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
339 emit_data
->output
[emit_data
->chan
] = LLVMBuildXor(ctx
->ac
.builder
,
340 emit_data
->args
[0], emit_data
->args
[1], "");
343 static void emit_ssg(const struct lp_build_tgsi_action
*action
,
344 struct lp_build_tgsi_context
*bld_base
,
345 struct lp_build_emit_data
*emit_data
)
347 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
351 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_I64SSG
) {
352 val
= ac_build_isign(&ctx
->ac
, emit_data
->args
[0], 64);
353 } else if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_ISSG
) {
354 val
= ac_build_isign(&ctx
->ac
, emit_data
->args
[0], 32);
355 } else if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_DSSG
) {
356 val
= ac_build_fsign(&ctx
->ac
, emit_data
->args
[0], 64);
358 val
= ac_build_fsign(&ctx
->ac
, emit_data
->args
[0], 32);
361 emit_data
->output
[emit_data
->chan
] = val
;
364 static void emit_ineg(const struct lp_build_tgsi_action
*action
,
365 struct lp_build_tgsi_context
*bld_base
,
366 struct lp_build_emit_data
*emit_data
)
368 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
369 emit_data
->output
[emit_data
->chan
] = LLVMBuildNeg(ctx
->ac
.builder
,
370 emit_data
->args
[0], "");
373 static void emit_dneg(const struct lp_build_tgsi_action
*action
,
374 struct lp_build_tgsi_context
*bld_base
,
375 struct lp_build_emit_data
*emit_data
)
377 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
378 emit_data
->output
[emit_data
->chan
] = LLVMBuildFNeg(ctx
->ac
.builder
,
379 emit_data
->args
[0], "");
382 static void emit_frac(const struct lp_build_tgsi_action
*action
,
383 struct lp_build_tgsi_context
*bld_base
,
384 struct lp_build_emit_data
*emit_data
)
386 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
389 if (emit_data
->info
->opcode
== TGSI_OPCODE_FRC
)
391 else if (emit_data
->info
->opcode
== TGSI_OPCODE_DFRAC
)
398 emit_data
->output
[emit_data
->chan
] =
399 ac_build_fract(&ctx
->ac
, emit_data
->args
[0], bitsize
);
402 static void emit_f2i(const struct lp_build_tgsi_action
*action
,
403 struct lp_build_tgsi_context
*bld_base
,
404 struct lp_build_emit_data
*emit_data
)
406 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
407 emit_data
->output
[emit_data
->chan
] = LLVMBuildFPToSI(ctx
->ac
.builder
,
408 emit_data
->args
[0], ctx
->i32
, "");
411 static void emit_f2u(const struct lp_build_tgsi_action
*action
,
412 struct lp_build_tgsi_context
*bld_base
,
413 struct lp_build_emit_data
*emit_data
)
415 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
416 emit_data
->output
[emit_data
->chan
] = LLVMBuildFPToUI(ctx
->ac
.builder
,
417 emit_data
->args
[0], ctx
->i32
, "");
420 static void emit_i2f(const struct lp_build_tgsi_action
*action
,
421 struct lp_build_tgsi_context
*bld_base
,
422 struct lp_build_emit_data
*emit_data
)
424 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
425 emit_data
->output
[emit_data
->chan
] = LLVMBuildSIToFP(ctx
->ac
.builder
,
426 emit_data
->args
[0], ctx
->f32
, "");
429 static void emit_u2f(const struct lp_build_tgsi_action
*action
,
430 struct lp_build_tgsi_context
*bld_base
,
431 struct lp_build_emit_data
*emit_data
)
433 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
434 emit_data
->output
[emit_data
->chan
] = LLVMBuildUIToFP(ctx
->ac
.builder
,
435 emit_data
->args
[0], ctx
->f32
, "");
439 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action
*action
,
440 struct lp_build_tgsi_context
*bld_base
,
441 struct lp_build_emit_data
*emit_data
)
443 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
444 emit_data
->output
[emit_data
->chan
] =
445 ac_build_intrinsic(&ctx
->ac
, action
->intr_name
,
446 emit_data
->dst_type
, emit_data
->args
,
447 emit_data
->arg_count
, AC_FUNC_ATTR_READNONE
);
450 static void emit_bfi(const struct lp_build_tgsi_action
*action
,
451 struct lp_build_tgsi_context
*bld_base
,
452 struct lp_build_emit_data
*emit_data
)
454 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
455 LLVMBuilderRef builder
= ctx
->ac
.builder
;
456 LLVMValueRef bfi_args
[3];
457 LLVMValueRef bfi_sm5
;
460 // Calculate the bitmask: (((1 << src3) - 1) << src2
461 bfi_args
[0] = LLVMBuildShl(builder
,
462 LLVMBuildSub(builder
,
463 LLVMBuildShl(builder
,
465 emit_data
->args
[3], ""),
467 emit_data
->args
[2], "");
469 bfi_args
[1] = LLVMBuildShl(builder
, emit_data
->args
[1],
470 emit_data
->args
[2], "");
472 bfi_args
[2] = emit_data
->args
[0];
475 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
476 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
479 LLVMBuildXor(builder
, bfi_args
[2],
480 LLVMBuildAnd(builder
, bfi_args
[0],
481 LLVMBuildXor(builder
, bfi_args
[1], bfi_args
[2],
484 /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
485 * uses the convenient V_BFI lowering for the above, which follows SM5
486 * and disagrees with GLSL semantics when bits (src3) is 32.
488 cond
= LLVMBuildICmp(builder
, LLVMIntUGE
, emit_data
->args
[3],
489 LLVMConstInt(ctx
->i32
, 32, 0), "");
490 emit_data
->output
[emit_data
->chan
] =
491 LLVMBuildSelect(builder
, cond
, emit_data
->args
[1], bfi_sm5
, "");
494 static void emit_bfe(const struct lp_build_tgsi_action
*action
,
495 struct lp_build_tgsi_context
*bld_base
,
496 struct lp_build_emit_data
*emit_data
)
498 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
500 /* FIXME: LLVM 7 returns incorrect result when count is 0.
501 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
503 LLVMValueRef zero
= ctx
->i32_0
;
504 LLVMValueRef bfe_sm5
=
505 ac_build_bfe(&ctx
->ac
, emit_data
->args
[0],
506 emit_data
->args
[1], emit_data
->args
[2],
507 emit_data
->info
->opcode
== TGSI_OPCODE_IBFE
);
509 /* Correct for GLSL semantics. */
510 LLVMValueRef cond
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntUGE
, emit_data
->args
[2],
511 LLVMConstInt(ctx
->i32
, 32, 0), "");
512 LLVMValueRef cond2
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, emit_data
->args
[2],
514 bfe_sm5
= LLVMBuildSelect(ctx
->ac
.builder
, cond
, emit_data
->args
[0], bfe_sm5
, "");
515 emit_data
->output
[emit_data
->chan
] =
516 LLVMBuildSelect(ctx
->ac
.builder
, cond2
, zero
, bfe_sm5
, "");
519 /* this is ffs in C */
520 static void emit_lsb(const struct lp_build_tgsi_action
*action
,
521 struct lp_build_tgsi_context
*bld_base
,
522 struct lp_build_emit_data
*emit_data
)
524 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
526 emit_data
->output
[emit_data
->chan
] = ac_find_lsb(&ctx
->ac
, emit_data
->dst_type
, emit_data
->args
[0]);
529 /* Find the last bit set. */
530 static void emit_umsb(const struct lp_build_tgsi_action
*action
,
531 struct lp_build_tgsi_context
*bld_base
,
532 struct lp_build_emit_data
*emit_data
)
534 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
536 emit_data
->output
[emit_data
->chan
] =
537 ac_build_umsb(&ctx
->ac
, emit_data
->args
[0], emit_data
->dst_type
);
540 /* Find the last bit opposite of the sign bit. */
541 static void emit_imsb(const struct lp_build_tgsi_action
*action
,
542 struct lp_build_tgsi_context
*bld_base
,
543 struct lp_build_emit_data
*emit_data
)
545 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
546 emit_data
->output
[emit_data
->chan
] =
547 ac_build_imsb(&ctx
->ac
, emit_data
->args
[0],
548 emit_data
->dst_type
);
551 static void emit_iabs(const struct lp_build_tgsi_action
*action
,
552 struct lp_build_tgsi_context
*bld_base
,
553 struct lp_build_emit_data
*emit_data
)
555 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
557 emit_data
->output
[emit_data
->chan
] =
558 ac_build_imax(&ctx
->ac
, emit_data
->args
[0],
559 LLVMBuildNeg(ctx
->ac
.builder
, emit_data
->args
[0], ""));
562 static void emit_minmax_int(const struct lp_build_tgsi_action
*action
,
563 struct lp_build_tgsi_context
*bld_base
,
564 struct lp_build_emit_data
*emit_data
)
566 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
569 switch (emit_data
->info
->opcode
) {
572 case TGSI_OPCODE_IMAX
:
573 case TGSI_OPCODE_I64MAX
:
576 case TGSI_OPCODE_IMIN
:
577 case TGSI_OPCODE_I64MIN
:
580 case TGSI_OPCODE_UMAX
:
581 case TGSI_OPCODE_U64MAX
:
584 case TGSI_OPCODE_UMIN
:
585 case TGSI_OPCODE_U64MIN
:
590 emit_data
->output
[emit_data
->chan
] =
591 LLVMBuildSelect(ctx
->ac
.builder
,
592 LLVMBuildICmp(ctx
->ac
.builder
, op
, emit_data
->args
[0],
593 emit_data
->args
[1], ""),
595 emit_data
->args
[1], "");
598 static void emit_pk2h(const struct lp_build_tgsi_action
*action
,
599 struct lp_build_tgsi_context
*bld_base
,
600 struct lp_build_emit_data
*emit_data
)
602 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
604 lp_build_emit_fetch(bld_base
, emit_data
->inst
, 0, TGSI_CHAN_X
),
605 lp_build_emit_fetch(bld_base
, emit_data
->inst
, 0, TGSI_CHAN_Y
),
609 /* From the GLSL 4.50 spec:
610 * "The rounding mode cannot be set and is undefined."
612 * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
614 emit_data
->output
[emit_data
->chan
] =
615 LLVMBuildBitCast(ctx
->ac
.builder
, ac_build_cvt_pkrtz_f16(&ctx
->ac
, v
),
619 static void emit_up2h(const struct lp_build_tgsi_action
*action
,
620 struct lp_build_tgsi_context
*bld_base
,
621 struct lp_build_emit_data
*emit_data
)
623 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
625 LLVMValueRef const16
, input
, val
;
628 i16
= LLVMInt16TypeInContext(ctx
->ac
.context
);
629 const16
= LLVMConstInt(ctx
->i32
, 16, 0);
630 input
= lp_build_emit_fetch(bld_base
, emit_data
->inst
, 0, TGSI_CHAN_X
);
632 for (i
= 0; i
< 2; i
++) {
633 val
= i
== 1 ? LLVMBuildLShr(ctx
->ac
.builder
, input
, const16
, "") : input
;
634 val
= LLVMBuildTrunc(ctx
->ac
.builder
, val
, i16
, "");
635 val
= ac_to_float(&ctx
->ac
, val
);
636 emit_data
->output
[i
] = LLVMBuildFPExt(ctx
->ac
.builder
, val
, ctx
->f32
, "");
640 static void emit_fdiv(const struct lp_build_tgsi_action
*action
,
641 struct lp_build_tgsi_context
*bld_base
,
642 struct lp_build_emit_data
*emit_data
)
644 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
646 emit_data
->output
[emit_data
->chan
] =
647 ac_build_fdiv(&ctx
->ac
, emit_data
->args
[0], emit_data
->args
[1]);
650 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
651 * the target machine. f64 needs global unsafe math flags to get rsq. */
652 static void emit_rsq(const struct lp_build_tgsi_action
*action
,
653 struct lp_build_tgsi_context
*bld_base
,
654 struct lp_build_emit_data
*emit_data
)
656 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
659 ac_build_intrinsic(&ctx
->ac
, "llvm.sqrt.f32", ctx
->f32
,
660 &emit_data
->args
[0], 1, AC_FUNC_ATTR_READNONE
);
662 emit_data
->output
[emit_data
->chan
] =
663 ac_build_fdiv(&ctx
->ac
, ctx
->ac
.f32_1
, sqrt
);
666 static void dfracexp_emit(const struct lp_build_tgsi_action
*action
,
667 struct lp_build_tgsi_context
*bld_base
,
668 struct lp_build_emit_data
*emit_data
)
670 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
671 LLVMValueRef in
= lp_build_emit_fetch(bld_base
, emit_data
->inst
, 0, TGSI_CHAN_X
);
673 emit_data
->output
[emit_data
->chan
] =
674 ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.frexp.mant.f64",
675 ctx
->ac
.f64
, &in
, 1, 0);
676 emit_data
->output1
[emit_data
->chan
] =
677 ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.frexp.exp.i32.f64",
678 ctx
->ac
.i32
, &in
, 1, 0);
681 void si_shader_context_init_alu(struct si_shader_context
*ctx
)
683 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
685 lp_set_default_actions(bld_base
);
687 bld_base
->op_actions
[TGSI_OPCODE_AND
].emit
= emit_and
;
688 bld_base
->op_actions
[TGSI_OPCODE_ARL
].emit
= emit_arl
;
689 bld_base
->op_actions
[TGSI_OPCODE_BFI
].emit
= emit_bfi
;
690 bld_base
->op_actions
[TGSI_OPCODE_BREV
].emit
= build_tgsi_intrinsic_nomem
;
691 bld_base
->op_actions
[TGSI_OPCODE_BREV
].intr_name
= "llvm.bitreverse.i32";
692 bld_base
->op_actions
[TGSI_OPCODE_CEIL
].emit
= build_tgsi_intrinsic_nomem
;
693 bld_base
->op_actions
[TGSI_OPCODE_CEIL
].intr_name
= "llvm.ceil.f32";
694 bld_base
->op_actions
[TGSI_OPCODE_CMP
].emit
= emit_cmp
;
695 bld_base
->op_actions
[TGSI_OPCODE_COS
].emit
= build_tgsi_intrinsic_nomem
;
696 bld_base
->op_actions
[TGSI_OPCODE_COS
].intr_name
= "llvm.cos.f32";
697 bld_base
->op_actions
[TGSI_OPCODE_DABS
].emit
= build_tgsi_intrinsic_nomem
;
698 bld_base
->op_actions
[TGSI_OPCODE_DABS
].intr_name
= "llvm.fabs.f64";
699 bld_base
->op_actions
[TGSI_OPCODE_DCEIL
].emit
= build_tgsi_intrinsic_nomem
;
700 bld_base
->op_actions
[TGSI_OPCODE_DCEIL
].intr_name
= "llvm.ceil.f64";
701 bld_base
->op_actions
[TGSI_OPCODE_DFLR
].emit
= build_tgsi_intrinsic_nomem
;
702 bld_base
->op_actions
[TGSI_OPCODE_DFLR
].intr_name
= "llvm.floor.f64";
703 bld_base
->op_actions
[TGSI_OPCODE_DFMA
].emit
= build_tgsi_intrinsic_nomem
;
704 bld_base
->op_actions
[TGSI_OPCODE_DFMA
].intr_name
= "llvm.fma.f64";
705 bld_base
->op_actions
[TGSI_OPCODE_DFRAC
].emit
= emit_frac
;
706 bld_base
->op_actions
[TGSI_OPCODE_DIV
].emit
= emit_fdiv
;
707 bld_base
->op_actions
[TGSI_OPCODE_DNEG
].emit
= emit_dneg
;
708 bld_base
->op_actions
[TGSI_OPCODE_DROUND
].emit
= build_tgsi_intrinsic_nomem
;
709 bld_base
->op_actions
[TGSI_OPCODE_DROUND
].intr_name
= "llvm.rint.f64";
710 bld_base
->op_actions
[TGSI_OPCODE_DSEQ
].emit
= emit_dcmp
;
711 bld_base
->op_actions
[TGSI_OPCODE_DSGE
].emit
= emit_dcmp
;
712 bld_base
->op_actions
[TGSI_OPCODE_DSLT
].emit
= emit_dcmp
;
713 bld_base
->op_actions
[TGSI_OPCODE_DSNE
].emit
= emit_dcmp
;
714 bld_base
->op_actions
[TGSI_OPCODE_DSSG
].emit
= emit_ssg
;
715 bld_base
->op_actions
[TGSI_OPCODE_DRSQ
].emit
= build_tgsi_intrinsic_nomem
;
716 bld_base
->op_actions
[TGSI_OPCODE_DRSQ
].intr_name
= "llvm.amdgcn.rsq.f64";
717 bld_base
->op_actions
[TGSI_OPCODE_DSQRT
].emit
= build_tgsi_intrinsic_nomem
;
718 bld_base
->op_actions
[TGSI_OPCODE_DSQRT
].intr_name
= "llvm.sqrt.f64";
719 bld_base
->op_actions
[TGSI_OPCODE_DTRUNC
].emit
= build_tgsi_intrinsic_nomem
;
720 bld_base
->op_actions
[TGSI_OPCODE_DTRUNC
].intr_name
= "llvm.trunc.f64";
721 bld_base
->op_actions
[TGSI_OPCODE_DFRACEXP
].emit
= dfracexp_emit
;
722 bld_base
->op_actions
[TGSI_OPCODE_DLDEXP
].emit
= build_tgsi_intrinsic_nomem
;
723 bld_base
->op_actions
[TGSI_OPCODE_DLDEXP
].intr_name
= "llvm.amdgcn.ldexp.f64";
724 bld_base
->op_actions
[TGSI_OPCODE_EX2
].emit
= build_tgsi_intrinsic_nomem
;
725 bld_base
->op_actions
[TGSI_OPCODE_EX2
].intr_name
= "llvm.exp2.f32";
726 bld_base
->op_actions
[TGSI_OPCODE_FLR
].emit
= build_tgsi_intrinsic_nomem
;
727 bld_base
->op_actions
[TGSI_OPCODE_FLR
].intr_name
= "llvm.floor.f32";
729 /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
730 if (ctx
->screen
->info
.chip_class
>= GFX10
) {
731 bld_base
->op_actions
[TGSI_OPCODE_FMA
].emit
= build_tgsi_intrinsic_nomem
;
732 bld_base
->op_actions
[TGSI_OPCODE_FMA
].intr_name
= "llvm.fma.f32";
734 bld_base
->op_actions
[TGSI_OPCODE_FMA
].emit
=
735 bld_base
->op_actions
[TGSI_OPCODE_MAD
].emit
;
738 bld_base
->op_actions
[TGSI_OPCODE_FRC
].emit
= emit_frac
;
739 bld_base
->op_actions
[TGSI_OPCODE_F2I
].emit
= emit_f2i
;
740 bld_base
->op_actions
[TGSI_OPCODE_F2U
].emit
= emit_f2u
;
741 bld_base
->op_actions
[TGSI_OPCODE_FSEQ
].emit
= emit_fcmp
;
742 bld_base
->op_actions
[TGSI_OPCODE_FSGE
].emit
= emit_fcmp
;
743 bld_base
->op_actions
[TGSI_OPCODE_FSLT
].emit
= emit_fcmp
;
744 bld_base
->op_actions
[TGSI_OPCODE_FSNE
].emit
= emit_fcmp
;
745 bld_base
->op_actions
[TGSI_OPCODE_IABS
].emit
= emit_iabs
;
746 bld_base
->op_actions
[TGSI_OPCODE_IBFE
].emit
= emit_bfe
;
747 bld_base
->op_actions
[TGSI_OPCODE_IDIV
].emit
= emit_idiv
;
748 bld_base
->op_actions
[TGSI_OPCODE_IMAX
].emit
= emit_minmax_int
;
749 bld_base
->op_actions
[TGSI_OPCODE_IMIN
].emit
= emit_minmax_int
;
750 bld_base
->op_actions
[TGSI_OPCODE_IMSB
].emit
= emit_imsb
;
751 bld_base
->op_actions
[TGSI_OPCODE_INEG
].emit
= emit_ineg
;
752 bld_base
->op_actions
[TGSI_OPCODE_ISHR
].emit
= emit_ishr
;
753 bld_base
->op_actions
[TGSI_OPCODE_ISGE
].emit
= emit_icmp
;
754 bld_base
->op_actions
[TGSI_OPCODE_ISLT
].emit
= emit_icmp
;
755 bld_base
->op_actions
[TGSI_OPCODE_ISSG
].emit
= emit_ssg
;
756 bld_base
->op_actions
[TGSI_OPCODE_I2F
].emit
= emit_i2f
;
757 bld_base
->op_actions
[TGSI_OPCODE_KILL_IF
].emit
= kil_emit
;
758 bld_base
->op_actions
[TGSI_OPCODE_KILL
].emit
= kil_emit
;
759 bld_base
->op_actions
[TGSI_OPCODE_LDEXP
].emit
= build_tgsi_intrinsic_nomem
;
760 bld_base
->op_actions
[TGSI_OPCODE_LDEXP
].intr_name
= "llvm.amdgcn.ldexp.f32";
761 bld_base
->op_actions
[TGSI_OPCODE_LSB
].emit
= emit_lsb
;
762 bld_base
->op_actions
[TGSI_OPCODE_LG2
].emit
= build_tgsi_intrinsic_nomem
;
763 bld_base
->op_actions
[TGSI_OPCODE_LG2
].intr_name
= "llvm.log2.f32";
764 bld_base
->op_actions
[TGSI_OPCODE_MAX
].emit
= build_tgsi_intrinsic_nomem
;
765 bld_base
->op_actions
[TGSI_OPCODE_MAX
].intr_name
= "llvm.maxnum.f32";
766 bld_base
->op_actions
[TGSI_OPCODE_MIN
].emit
= build_tgsi_intrinsic_nomem
;
767 bld_base
->op_actions
[TGSI_OPCODE_MIN
].intr_name
= "llvm.minnum.f32";
768 bld_base
->op_actions
[TGSI_OPCODE_MOD
].emit
= emit_mod
;
769 bld_base
->op_actions
[TGSI_OPCODE_UMSB
].emit
= emit_umsb
;
770 bld_base
->op_actions
[TGSI_OPCODE_NOT
].emit
= emit_not
;
771 bld_base
->op_actions
[TGSI_OPCODE_OR
].emit
= emit_or
;
772 bld_base
->op_actions
[TGSI_OPCODE_PK2H
].emit
= emit_pk2h
;
773 bld_base
->op_actions
[TGSI_OPCODE_POPC
].emit
= build_tgsi_intrinsic_nomem
;
774 bld_base
->op_actions
[TGSI_OPCODE_POPC
].intr_name
= "llvm.ctpop.i32";
775 bld_base
->op_actions
[TGSI_OPCODE_POW
].emit
= build_tgsi_intrinsic_nomem
;
776 bld_base
->op_actions
[TGSI_OPCODE_POW
].intr_name
= "llvm.pow.f32";
777 bld_base
->op_actions
[TGSI_OPCODE_ROUND
].emit
= build_tgsi_intrinsic_nomem
;
778 bld_base
->op_actions
[TGSI_OPCODE_ROUND
].intr_name
= "llvm.rint.f32";
779 bld_base
->op_actions
[TGSI_OPCODE_RSQ
].emit
= emit_rsq
;
780 bld_base
->op_actions
[TGSI_OPCODE_SGE
].emit
= emit_set_cond
;
781 bld_base
->op_actions
[TGSI_OPCODE_SEQ
].emit
= emit_set_cond
;
782 bld_base
->op_actions
[TGSI_OPCODE_SHL
].emit
= emit_shl
;
783 bld_base
->op_actions
[TGSI_OPCODE_SLE
].emit
= emit_set_cond
;
784 bld_base
->op_actions
[TGSI_OPCODE_SLT
].emit
= emit_set_cond
;
785 bld_base
->op_actions
[TGSI_OPCODE_SNE
].emit
= emit_set_cond
;
786 bld_base
->op_actions
[TGSI_OPCODE_SGT
].emit
= emit_set_cond
;
787 bld_base
->op_actions
[TGSI_OPCODE_SIN
].emit
= build_tgsi_intrinsic_nomem
;
788 bld_base
->op_actions
[TGSI_OPCODE_SIN
].intr_name
= "llvm.sin.f32";
789 bld_base
->op_actions
[TGSI_OPCODE_SQRT
].emit
= build_tgsi_intrinsic_nomem
;
790 bld_base
->op_actions
[TGSI_OPCODE_SQRT
].intr_name
= "llvm.sqrt.f32";
791 bld_base
->op_actions
[TGSI_OPCODE_SSG
].emit
= emit_ssg
;
792 bld_base
->op_actions
[TGSI_OPCODE_TRUNC
].emit
= build_tgsi_intrinsic_nomem
;
793 bld_base
->op_actions
[TGSI_OPCODE_TRUNC
].intr_name
= "llvm.trunc.f32";
794 bld_base
->op_actions
[TGSI_OPCODE_UADD
].emit
= emit_uadd
;
795 bld_base
->op_actions
[TGSI_OPCODE_UBFE
].emit
= emit_bfe
;
796 bld_base
->op_actions
[TGSI_OPCODE_UDIV
].emit
= emit_udiv
;
797 bld_base
->op_actions
[TGSI_OPCODE_UMAX
].emit
= emit_minmax_int
;
798 bld_base
->op_actions
[TGSI_OPCODE_UMIN
].emit
= emit_minmax_int
;
799 bld_base
->op_actions
[TGSI_OPCODE_UMOD
].emit
= emit_umod
;
800 bld_base
->op_actions
[TGSI_OPCODE_USEQ
].emit
= emit_icmp
;
801 bld_base
->op_actions
[TGSI_OPCODE_USGE
].emit
= emit_icmp
;
802 bld_base
->op_actions
[TGSI_OPCODE_USHR
].emit
= emit_ushr
;
803 bld_base
->op_actions
[TGSI_OPCODE_USLT
].emit
= emit_icmp
;
804 bld_base
->op_actions
[TGSI_OPCODE_USNE
].emit
= emit_icmp
;
805 bld_base
->op_actions
[TGSI_OPCODE_U2F
].emit
= emit_u2f
;
806 bld_base
->op_actions
[TGSI_OPCODE_XOR
].emit
= emit_xor
;
807 bld_base
->op_actions
[TGSI_OPCODE_UCMP
].emit
= emit_ucmp
;
808 bld_base
->op_actions
[TGSI_OPCODE_UP2H
].emit
= emit_up2h
;
810 bld_base
->op_actions
[TGSI_OPCODE_I64MAX
].emit
= emit_minmax_int
;
811 bld_base
->op_actions
[TGSI_OPCODE_I64MIN
].emit
= emit_minmax_int
;
812 bld_base
->op_actions
[TGSI_OPCODE_U64MAX
].emit
= emit_minmax_int
;
813 bld_base
->op_actions
[TGSI_OPCODE_U64MIN
].emit
= emit_minmax_int
;
814 bld_base
->op_actions
[TGSI_OPCODE_I64ABS
].emit
= emit_iabs
;
815 bld_base
->op_actions
[TGSI_OPCODE_I64SSG
].emit
= emit_ssg
;
816 bld_base
->op_actions
[TGSI_OPCODE_I64NEG
].emit
= emit_ineg
;
818 bld_base
->op_actions
[TGSI_OPCODE_U64SEQ
].emit
= emit_icmp
;
819 bld_base
->op_actions
[TGSI_OPCODE_U64SNE
].emit
= emit_icmp
;
820 bld_base
->op_actions
[TGSI_OPCODE_U64SGE
].emit
= emit_icmp
;
821 bld_base
->op_actions
[TGSI_OPCODE_U64SLT
].emit
= emit_icmp
;
822 bld_base
->op_actions
[TGSI_OPCODE_I64SGE
].emit
= emit_icmp
;
823 bld_base
->op_actions
[TGSI_OPCODE_I64SLT
].emit
= emit_icmp
;
825 bld_base
->op_actions
[TGSI_OPCODE_U64ADD
].emit
= emit_uadd
;
826 bld_base
->op_actions
[TGSI_OPCODE_U64SHL
].emit
= emit_shl
;
827 bld_base
->op_actions
[TGSI_OPCODE_U64SHR
].emit
= emit_ushr
;
828 bld_base
->op_actions
[TGSI_OPCODE_I64SHR
].emit
= emit_ishr
;
830 bld_base
->op_actions
[TGSI_OPCODE_U64MOD
].emit
= emit_umod
;
831 bld_base
->op_actions
[TGSI_OPCODE_I64MOD
].emit
= emit_mod
;
832 bld_base
->op_actions
[TGSI_OPCODE_U64DIV
].emit
= emit_udiv
;
833 bld_base
->op_actions
[TGSI_OPCODE_I64DIV
].emit
= emit_idiv
;