radeonsi: Save CLEAR_STATE initial values for optimization
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_alu.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "ac_llvm_util.h"
27
28 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
29 struct lp_build_emit_data *emit_data)
30 {
31 const struct tgsi_full_instruction *inst = emit_data->inst;
32 struct si_shader_context *ctx = si_shader_context(bld_base);
33 LLVMBuilderRef builder = ctx->ac.builder;
34 unsigned i;
35 LLVMValueRef conds[TGSI_NUM_CHANNELS];
36
37 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
38 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
39 conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
40 ctx->ac.f32_0, "");
41 }
42
43 /* And the conditions together */
44 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
45 conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
46 }
47
48 emit_data->dst_type = ctx->voidt;
49 emit_data->arg_count = 1;
50 emit_data->args[0] = conds[0];
51 }
52
53 void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
54 {
55 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
56 LLVMBuilderRef builder = ctx->ac.builder;
57
58 if (ctx->shader->selector->force_correct_derivs_after_kill) {
59 /* LLVM 6.0 can kill immediately while maintaining WQM. */
60 if (HAVE_LLVM >= 0x0600) {
61 ac_build_kill_if_false(&ctx->ac,
62 ac_build_wqm_vote(&ctx->ac, visible));
63 }
64
65 LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
66 mask = LLVMBuildAnd(builder, mask, visible, "");
67 LLVMBuildStore(builder, mask, ctx->postponed_kill);
68 return;
69 }
70
71 ac_build_kill_if_false(&ctx->ac, visible);
72 }
73
74 static void kil_emit(const struct lp_build_tgsi_action *action,
75 struct lp_build_tgsi_context *bld_base,
76 struct lp_build_emit_data *emit_data)
77 {
78 struct si_shader_context *ctx = si_shader_context(bld_base);
79 LLVMValueRef visible;
80
81 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
82 visible = emit_data->args[0];
83 } else {
84 assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
85 visible = LLVMConstInt(ctx->i1, false, 0);
86 }
87
88 si_llvm_emit_kill(&ctx->abi, visible);
89 }
90
91 static void emit_icmp(const struct lp_build_tgsi_action *action,
92 struct lp_build_tgsi_context *bld_base,
93 struct lp_build_emit_data *emit_data)
94 {
95 unsigned pred;
96 struct si_shader_context *ctx = si_shader_context(bld_base);
97
98 switch (emit_data->inst->Instruction.Opcode) {
99 case TGSI_OPCODE_USEQ:
100 case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
101 case TGSI_OPCODE_USNE:
102 case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
103 case TGSI_OPCODE_USGE:
104 case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
105 case TGSI_OPCODE_USLT:
106 case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
107 case TGSI_OPCODE_ISGE:
108 case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
109 case TGSI_OPCODE_ISLT:
110 case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
111 default:
112 assert(!"unknown instruction");
113 pred = 0;
114 break;
115 }
116
117 LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred,
118 emit_data->args[0], emit_data->args[1],"");
119
120 v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
121
122 emit_data->output[emit_data->chan] = v;
123 }
124
125 static void emit_ucmp(const struct lp_build_tgsi_action *action,
126 struct lp_build_tgsi_context *bld_base,
127 struct lp_build_emit_data *emit_data)
128 {
129 struct si_shader_context *ctx = si_shader_context(bld_base);
130 LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);
131
132 LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0,
133 ctx->i32_0, "");
134
135 emit_data->output[emit_data->chan] =
136 LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], "");
137 }
138
139 static void emit_cmp(const struct lp_build_tgsi_action *action,
140 struct lp_build_tgsi_context *bld_base,
141 struct lp_build_emit_data *emit_data)
142 {
143 struct si_shader_context *ctx = si_shader_context(bld_base);
144 LLVMValueRef cond, *args = emit_data->args;
145
146 cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0],
147 ctx->ac.f32_0, "");
148
149 emit_data->output[emit_data->chan] =
150 LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], "");
151 }
152
153 static void emit_set_cond(const struct lp_build_tgsi_action *action,
154 struct lp_build_tgsi_context *bld_base,
155 struct lp_build_emit_data *emit_data)
156 {
157 struct si_shader_context *ctx = si_shader_context(bld_base);
158 LLVMRealPredicate pred;
159 LLVMValueRef cond;
160
161 /* Use ordered for everything but NE (which is usual for
162 * float comparisons)
163 */
164 switch (emit_data->inst->Instruction.Opcode) {
165 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
166 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
167 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
168 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
169 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
170 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
171 default: assert(!"unknown instruction"); pred = 0; break;
172 }
173
174 cond = LLVMBuildFCmp(ctx->ac.builder,
175 pred, emit_data->args[0], emit_data->args[1], "");
176
177 emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder,
178 cond, ctx->ac.f32_1, ctx->ac.f32_0, "");
179 }
180
181 static void emit_fcmp(const struct lp_build_tgsi_action *action,
182 struct lp_build_tgsi_context *bld_base,
183 struct lp_build_emit_data *emit_data)
184 {
185 struct si_shader_context *ctx = si_shader_context(bld_base);
186 LLVMRealPredicate pred;
187
188 /* Use ordered for everything but NE (which is usual for
189 * float comparisons)
190 */
191 switch (emit_data->inst->Instruction.Opcode) {
192 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
193 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
194 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
195 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
196 default: assert(!"unknown instruction"); pred = 0; break;
197 }
198
199 LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
200 emit_data->args[0], emit_data->args[1],"");
201
202 v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
203
204 emit_data->output[emit_data->chan] = v;
205 }
206
207 static void emit_dcmp(const struct lp_build_tgsi_action *action,
208 struct lp_build_tgsi_context *bld_base,
209 struct lp_build_emit_data *emit_data)
210 {
211 struct si_shader_context *ctx = si_shader_context(bld_base);
212 LLVMRealPredicate pred;
213
214 /* Use ordered for everything but NE (which is usual for
215 * float comparisons)
216 */
217 switch (emit_data->inst->Instruction.Opcode) {
218 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
219 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
220 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
221 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
222 default: assert(!"unknown instruction"); pred = 0; break;
223 }
224
225 LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
226 emit_data->args[0], emit_data->args[1],"");
227
228 v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
229
230 emit_data->output[emit_data->chan] = v;
231 }
232
233 static void emit_not(const struct lp_build_tgsi_action *action,
234 struct lp_build_tgsi_context *bld_base,
235 struct lp_build_emit_data *emit_data)
236 {
237 struct si_shader_context *ctx = si_shader_context(bld_base);
238 LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
239 emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, "");
240 }
241
242 static void emit_arl(const struct lp_build_tgsi_action *action,
243 struct lp_build_tgsi_context *bld_base,
244 struct lp_build_emit_data *emit_data)
245 {
246 struct si_shader_context *ctx = si_shader_context(bld_base);
247 LLVMValueRef floor_index =
248 ac_build_intrinsic(&ctx->ac, "llvm.floor.f32", ctx->f32,
249 &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
250 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
251 floor_index, ctx->i32, "");
252 }
253
254 static void emit_and(const struct lp_build_tgsi_action *action,
255 struct lp_build_tgsi_context *bld_base,
256 struct lp_build_emit_data *emit_data)
257 {
258 struct si_shader_context *ctx = si_shader_context(bld_base);
259 emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder,
260 emit_data->args[0], emit_data->args[1], "");
261 }
262
263 static void emit_or(const struct lp_build_tgsi_action *action,
264 struct lp_build_tgsi_context *bld_base,
265 struct lp_build_emit_data *emit_data)
266 {
267 struct si_shader_context *ctx = si_shader_context(bld_base);
268 emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder,
269 emit_data->args[0], emit_data->args[1], "");
270 }
271
272 static void emit_uadd(const struct lp_build_tgsi_action *action,
273 struct lp_build_tgsi_context *bld_base,
274 struct lp_build_emit_data *emit_data)
275 {
276 struct si_shader_context *ctx = si_shader_context(bld_base);
277 emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder,
278 emit_data->args[0], emit_data->args[1], "");
279 }
280
281 static void emit_udiv(const struct lp_build_tgsi_action *action,
282 struct lp_build_tgsi_context *bld_base,
283 struct lp_build_emit_data *emit_data)
284 {
285 struct si_shader_context *ctx = si_shader_context(bld_base);
286 emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder,
287 emit_data->args[0], emit_data->args[1], "");
288 }
289
290 static void emit_idiv(const struct lp_build_tgsi_action *action,
291 struct lp_build_tgsi_context *bld_base,
292 struct lp_build_emit_data *emit_data)
293 {
294 struct si_shader_context *ctx = si_shader_context(bld_base);
295 emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder,
296 emit_data->args[0], emit_data->args[1], "");
297 }
298
299 static void emit_mod(const struct lp_build_tgsi_action *action,
300 struct lp_build_tgsi_context *bld_base,
301 struct lp_build_emit_data *emit_data)
302 {
303 struct si_shader_context *ctx = si_shader_context(bld_base);
304 emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder,
305 emit_data->args[0], emit_data->args[1], "");
306 }
307
308 static void emit_umod(const struct lp_build_tgsi_action *action,
309 struct lp_build_tgsi_context *bld_base,
310 struct lp_build_emit_data *emit_data)
311 {
312 struct si_shader_context *ctx = si_shader_context(bld_base);
313 emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder,
314 emit_data->args[0], emit_data->args[1], "");
315 }
316
317 static void emit_shl(const struct lp_build_tgsi_action *action,
318 struct lp_build_tgsi_context *bld_base,
319 struct lp_build_emit_data *emit_data)
320 {
321 struct si_shader_context *ctx = si_shader_context(bld_base);
322 emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder,
323 emit_data->args[0], emit_data->args[1], "");
324 }
325
326 static void emit_ushr(const struct lp_build_tgsi_action *action,
327 struct lp_build_tgsi_context *bld_base,
328 struct lp_build_emit_data *emit_data)
329 {
330 struct si_shader_context *ctx = si_shader_context(bld_base);
331 emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder,
332 emit_data->args[0], emit_data->args[1], "");
333 }
334 static void emit_ishr(const struct lp_build_tgsi_action *action,
335 struct lp_build_tgsi_context *bld_base,
336 struct lp_build_emit_data *emit_data)
337 {
338 struct si_shader_context *ctx = si_shader_context(bld_base);
339 emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder,
340 emit_data->args[0], emit_data->args[1], "");
341 }
342
343 static void emit_xor(const struct lp_build_tgsi_action *action,
344 struct lp_build_tgsi_context *bld_base,
345 struct lp_build_emit_data *emit_data)
346 {
347 struct si_shader_context *ctx = si_shader_context(bld_base);
348 emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder,
349 emit_data->args[0], emit_data->args[1], "");
350 }
351
352 static void emit_ssg(const struct lp_build_tgsi_action *action,
353 struct lp_build_tgsi_context *bld_base,
354 struct lp_build_emit_data *emit_data)
355 {
356 struct si_shader_context *ctx = si_shader_context(bld_base);
357
358 LLVMValueRef val;
359
360 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
361 val = ac_build_isign(&ctx->ac, emit_data->args[0], 64);
362 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
363 val = ac_build_isign(&ctx->ac, emit_data->args[0], 32);
364 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
365 val = ac_build_fsign(&ctx->ac, emit_data->args[0], 64);
366 } else {
367 val = ac_build_fsign(&ctx->ac, emit_data->args[0], 32);
368 }
369
370 emit_data->output[emit_data->chan] = val;
371 }
372
373 static void emit_ineg(const struct lp_build_tgsi_action *action,
374 struct lp_build_tgsi_context *bld_base,
375 struct lp_build_emit_data *emit_data)
376 {
377 struct si_shader_context *ctx = si_shader_context(bld_base);
378 emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder,
379 emit_data->args[0], "");
380 }
381
382 static void emit_dneg(const struct lp_build_tgsi_action *action,
383 struct lp_build_tgsi_context *bld_base,
384 struct lp_build_emit_data *emit_data)
385 {
386 struct si_shader_context *ctx = si_shader_context(bld_base);
387 emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder,
388 emit_data->args[0], "");
389 }
390
391 static void emit_frac(const struct lp_build_tgsi_action *action,
392 struct lp_build_tgsi_context *bld_base,
393 struct lp_build_emit_data *emit_data)
394 {
395 struct si_shader_context *ctx = si_shader_context(bld_base);
396 unsigned bitsize;
397
398 if (emit_data->info->opcode == TGSI_OPCODE_FRC)
399 bitsize = 32;
400 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
401 bitsize = 64;
402 else {
403 assert(0);
404 return;
405 }
406
407 emit_data->output[emit_data->chan] =
408 ac_build_fract(&ctx->ac, emit_data->args[0], bitsize);
409 }
410
411 static void emit_f2i(const struct lp_build_tgsi_action *action,
412 struct lp_build_tgsi_context *bld_base,
413 struct lp_build_emit_data *emit_data)
414 {
415 struct si_shader_context *ctx = si_shader_context(bld_base);
416 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
417 emit_data->args[0], ctx->i32, "");
418 }
419
420 static void emit_f2u(const struct lp_build_tgsi_action *action,
421 struct lp_build_tgsi_context *bld_base,
422 struct lp_build_emit_data *emit_data)
423 {
424 struct si_shader_context *ctx = si_shader_context(bld_base);
425 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder,
426 emit_data->args[0], ctx->i32, "");
427 }
428
429 static void emit_i2f(const struct lp_build_tgsi_action *action,
430 struct lp_build_tgsi_context *bld_base,
431 struct lp_build_emit_data *emit_data)
432 {
433 struct si_shader_context *ctx = si_shader_context(bld_base);
434 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder,
435 emit_data->args[0], ctx->f32, "");
436 }
437
438 static void emit_u2f(const struct lp_build_tgsi_action *action,
439 struct lp_build_tgsi_context *bld_base,
440 struct lp_build_emit_data *emit_data)
441 {
442 struct si_shader_context *ctx = si_shader_context(bld_base);
443 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder,
444 emit_data->args[0], ctx->f32, "");
445 }
446
447 static void
448 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
449 struct lp_build_tgsi_context *bld_base,
450 struct lp_build_emit_data *emit_data)
451 {
452 struct si_shader_context *ctx = si_shader_context(bld_base);
453 emit_data->output[emit_data->chan] =
454 ac_build_intrinsic(&ctx->ac, action->intr_name,
455 emit_data->dst_type, emit_data->args,
456 emit_data->arg_count, AC_FUNC_ATTR_READNONE);
457 }
458
459 static void emit_bfi(const struct lp_build_tgsi_action *action,
460 struct lp_build_tgsi_context *bld_base,
461 struct lp_build_emit_data *emit_data)
462 {
463 struct si_shader_context *ctx = si_shader_context(bld_base);
464 LLVMBuilderRef builder = ctx->ac.builder;
465 LLVMValueRef bfi_args[3];
466 LLVMValueRef bfi_sm5;
467 LLVMValueRef cond;
468
469 // Calculate the bitmask: (((1 << src3) - 1) << src2
470 bfi_args[0] = LLVMBuildShl(builder,
471 LLVMBuildSub(builder,
472 LLVMBuildShl(builder,
473 ctx->i32_1,
474 emit_data->args[3], ""),
475 ctx->i32_1, ""),
476 emit_data->args[2], "");
477
478 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
479 emit_data->args[2], "");
480
481 bfi_args[2] = emit_data->args[0];
482
483 /* Calculate:
484 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
485 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
486 */
487 bfi_sm5 =
488 LLVMBuildXor(builder, bfi_args[2],
489 LLVMBuildAnd(builder, bfi_args[0],
490 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
491 ""), ""), "");
492
493 /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
494 * uses the convenient V_BFI lowering for the above, which follows SM5
495 * and disagrees with GLSL semantics when bits (src3) is 32.
496 */
497 cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
498 LLVMConstInt(ctx->i32, 32, 0), "");
499 emit_data->output[emit_data->chan] =
500 LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
501 }
502
503 static void emit_bfe(const struct lp_build_tgsi_action *action,
504 struct lp_build_tgsi_context *bld_base,
505 struct lp_build_emit_data *emit_data)
506 {
507 struct si_shader_context *ctx = si_shader_context(bld_base);
508 LLVMValueRef bfe_sm5;
509 LLVMValueRef cond;
510
511 bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0],
512 emit_data->args[1], emit_data->args[2],
513 emit_data->info->opcode == TGSI_OPCODE_IBFE);
514
515 /* Correct for GLSL semantics. */
516 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
517 LLVMConstInt(ctx->i32, 32, 0), "");
518 emit_data->output[emit_data->chan] =
519 LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
520 }
521
522 /* this is ffs in C */
523 static void emit_lsb(const struct lp_build_tgsi_action *action,
524 struct lp_build_tgsi_context *bld_base,
525 struct lp_build_emit_data *emit_data)
526 {
527 struct si_shader_context *ctx = si_shader_context(bld_base);
528
529 emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]);
530 }
531
532 /* Find the last bit set. */
533 static void emit_umsb(const struct lp_build_tgsi_action *action,
534 struct lp_build_tgsi_context *bld_base,
535 struct lp_build_emit_data *emit_data)
536 {
537 struct si_shader_context *ctx = si_shader_context(bld_base);
538
539 emit_data->output[emit_data->chan] =
540 ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type);
541 }
542
543 /* Find the last bit opposite of the sign bit. */
544 static void emit_imsb(const struct lp_build_tgsi_action *action,
545 struct lp_build_tgsi_context *bld_base,
546 struct lp_build_emit_data *emit_data)
547 {
548 struct si_shader_context *ctx = si_shader_context(bld_base);
549 emit_data->output[emit_data->chan] =
550 ac_build_imsb(&ctx->ac, emit_data->args[0],
551 emit_data->dst_type);
552 }
553
554 static void emit_iabs(const struct lp_build_tgsi_action *action,
555 struct lp_build_tgsi_context *bld_base,
556 struct lp_build_emit_data *emit_data)
557 {
558 struct si_shader_context *ctx = si_shader_context(bld_base);
559
560 emit_data->output[emit_data->chan] =
561 ac_build_imax(&ctx->ac, emit_data->args[0],
562 LLVMBuildNeg(ctx->ac.builder, emit_data->args[0], ""));
563 }
564
565 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
566 struct lp_build_tgsi_context *bld_base,
567 struct lp_build_emit_data *emit_data)
568 {
569 struct si_shader_context *ctx = si_shader_context(bld_base);
570 LLVMIntPredicate op;
571
572 switch (emit_data->info->opcode) {
573 default:
574 assert(0);
575 case TGSI_OPCODE_IMAX:
576 case TGSI_OPCODE_I64MAX:
577 op = LLVMIntSGT;
578 break;
579 case TGSI_OPCODE_IMIN:
580 case TGSI_OPCODE_I64MIN:
581 op = LLVMIntSLT;
582 break;
583 case TGSI_OPCODE_UMAX:
584 case TGSI_OPCODE_U64MAX:
585 op = LLVMIntUGT;
586 break;
587 case TGSI_OPCODE_UMIN:
588 case TGSI_OPCODE_U64MIN:
589 op = LLVMIntULT;
590 break;
591 }
592
593 emit_data->output[emit_data->chan] =
594 LLVMBuildSelect(ctx->ac.builder,
595 LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0],
596 emit_data->args[1], ""),
597 emit_data->args[0],
598 emit_data->args[1], "");
599 }
600
601 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
602 struct lp_build_emit_data *emit_data)
603 {
604 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
605 0, TGSI_CHAN_X);
606 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
607 0, TGSI_CHAN_Y);
608 }
609
610 static void emit_pk2h(const struct lp_build_tgsi_action *action,
611 struct lp_build_tgsi_context *bld_base,
612 struct lp_build_emit_data *emit_data)
613 {
614 /* From the GLSL 4.50 spec:
615 * "The rounding mode cannot be set and is undefined."
616 *
617 * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
618 */
619 emit_data->output[emit_data->chan] =
620 ac_build_cvt_pkrtz_f16(&si_shader_context(bld_base)->ac,
621 emit_data->args);
622 }
623
624 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
625 struct lp_build_emit_data *emit_data)
626 {
627 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
628 0, TGSI_CHAN_X);
629 }
630
631 static void emit_up2h(const struct lp_build_tgsi_action *action,
632 struct lp_build_tgsi_context *bld_base,
633 struct lp_build_emit_data *emit_data)
634 {
635 struct si_shader_context *ctx = si_shader_context(bld_base);
636 LLVMTypeRef i16;
637 LLVMValueRef const16, input, val;
638 unsigned i;
639
640 i16 = LLVMInt16TypeInContext(ctx->ac.context);
641 const16 = LLVMConstInt(ctx->i32, 16, 0);
642 input = emit_data->args[0];
643
644 for (i = 0; i < 2; i++) {
645 val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
646 val = LLVMBuildTrunc(ctx->ac.builder, val, i16, "");
647 val = ac_to_float(&ctx->ac, val);
648 emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, "");
649 }
650 }
651
652 static void emit_fdiv(const struct lp_build_tgsi_action *action,
653 struct lp_build_tgsi_context *bld_base,
654 struct lp_build_emit_data *emit_data)
655 {
656 struct si_shader_context *ctx = si_shader_context(bld_base);
657
658 emit_data->output[emit_data->chan] =
659 ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]);
660 }
661
662 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
663 * the target machine. f64 needs global unsafe math flags to get rsq. */
664 static void emit_rsq(const struct lp_build_tgsi_action *action,
665 struct lp_build_tgsi_context *bld_base,
666 struct lp_build_emit_data *emit_data)
667 {
668 struct si_shader_context *ctx = si_shader_context(bld_base);
669
670 LLVMValueRef sqrt =
671 ac_build_intrinsic(&ctx->ac, "llvm.sqrt.f32", ctx->f32,
672 &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
673
674 emit_data->output[emit_data->chan] =
675 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt);
676 }
677
678 static void dfracexp_fetch_args(struct lp_build_tgsi_context *bld_base,
679 struct lp_build_emit_data *emit_data)
680 {
681 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
682 emit_data->arg_count = 1;
683 }
684
685 static void dfracexp_emit(const struct lp_build_tgsi_action *action,
686 struct lp_build_tgsi_context *bld_base,
687 struct lp_build_emit_data *emit_data)
688 {
689 struct si_shader_context *ctx = si_shader_context(bld_base);
690
691 emit_data->output[emit_data->chan] =
692 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
693 ctx->ac.f64, &emit_data->args[0], 1, 0);
694 emit_data->output1[emit_data->chan] =
695 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64",
696 ctx->ac.i32, &emit_data->args[0], 1, 0);
697 }
698
699 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
700 {
701 lp_set_default_actions(bld_base);
702
703 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
704 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
705 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
706 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
707 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32";
708 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
709 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
710 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
711 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
712 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
713 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
714 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
715 bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
716 bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
717 bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
718 bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
719 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
720 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
721 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
722 bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
723 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
724 bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
725 bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
726 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
727 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
728 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
729 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
730 bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
731 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
732 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
733 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
734 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
735 bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
736 bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
737 bld_base->op_actions[TGSI_OPCODE_DFRACEXP].fetch_args = dfracexp_fetch_args;
738 bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
739 bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
740 bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
741 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
742 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
743 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
744 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
745 bld_base->op_actions[TGSI_OPCODE_FMA].emit =
746 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
747 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
748 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
749 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
750 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
751 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
752 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
753 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
754 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
755 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
756 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
757 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
758 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
759 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
760 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
761 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
762 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
763 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
764 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
765 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
766 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
767 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
768 bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
769 bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
770 bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32";
771 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
772 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
773 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
774 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
775 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
776 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
777 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
778 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
779 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
780 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
781 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
782 bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
783 bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
784 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
785 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
786 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
787 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
788 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
789 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
790 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
791 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
792 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
793 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
794 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
795 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
796 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
797 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
798 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
799 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
800 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
801 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
802 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
803 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
804 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
805 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
806 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
807 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
808 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
809 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
810 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
811 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
812 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
813 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
814 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
815 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
816 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
817 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
818 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
819 bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
820 bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
821
822 bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
823 bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
824 bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
825 bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
826 bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
827 bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
828 bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
829
830 bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
831 bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
832 bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
833 bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
834 bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
835 bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
836
837 bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
838 bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
839 bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
840 bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
841
842 bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
843 bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
844 bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
845 bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
846 }