2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "si_shader_internal.h"
25 #include "gallivm/lp_bld_const.h"
26 #include "gallivm/lp_bld_intr.h"
27 #include "gallivm/lp_bld_gather.h"
28 #include "tgsi/tgsi_parse.h"
30 static void kill_if_fetch_args(struct lp_build_tgsi_context
*bld_base
,
31 struct lp_build_emit_data
*emit_data
)
33 const struct tgsi_full_instruction
*inst
= emit_data
->inst
;
34 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
35 LLVMBuilderRef builder
= gallivm
->builder
;
37 LLVMValueRef conds
[TGSI_NUM_CHANNELS
];
39 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
40 LLVMValueRef value
= lp_build_emit_fetch(bld_base
, inst
, 0, i
);
41 conds
[i
] = LLVMBuildFCmp(builder
, LLVMRealOLT
, value
,
42 bld_base
->base
.zero
, "");
45 /* Or the conditions together */
46 for (i
= TGSI_NUM_CHANNELS
- 1; i
> 0; i
--) {
47 conds
[i
- 1] = LLVMBuildOr(builder
, conds
[i
], conds
[i
- 1], "");
50 emit_data
->dst_type
= LLVMVoidTypeInContext(gallivm
->context
);
51 emit_data
->arg_count
= 1;
52 emit_data
->args
[0] = LLVMBuildSelect(builder
, conds
[0],
53 lp_build_const_float(gallivm
, -1.0f
),
54 bld_base
->base
.zero
, "");
57 static void kil_emit(const struct lp_build_tgsi_action
*action
,
58 struct lp_build_tgsi_context
*bld_base
,
59 struct lp_build_emit_data
*emit_data
)
62 for (i
= 0; i
< emit_data
->arg_count
; i
++) {
63 emit_data
->output
[i
] = lp_build_intrinsic_unary(
64 bld_base
->base
.gallivm
->builder
,
66 emit_data
->dst_type
, emit_data
->args
[i
]);
70 static void emit_icmp(const struct lp_build_tgsi_action
*action
,
71 struct lp_build_tgsi_context
*bld_base
,
72 struct lp_build_emit_data
*emit_data
)
75 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
76 LLVMContextRef context
= bld_base
->base
.gallivm
->context
;
78 switch (emit_data
->inst
->Instruction
.Opcode
) {
79 case TGSI_OPCODE_USEQ
:
80 case TGSI_OPCODE_U64SEQ
: pred
= LLVMIntEQ
; break;
81 case TGSI_OPCODE_USNE
:
82 case TGSI_OPCODE_U64SNE
: pred
= LLVMIntNE
; break;
83 case TGSI_OPCODE_USGE
:
84 case TGSI_OPCODE_U64SGE
: pred
= LLVMIntUGE
; break;
85 case TGSI_OPCODE_USLT
:
86 case TGSI_OPCODE_U64SLT
: pred
= LLVMIntULT
; break;
87 case TGSI_OPCODE_ISGE
:
88 case TGSI_OPCODE_I64SGE
: pred
= LLVMIntSGE
; break;
89 case TGSI_OPCODE_ISLT
:
90 case TGSI_OPCODE_I64SLT
: pred
= LLVMIntSLT
; break;
92 assert(!"unknown instruction");
97 LLVMValueRef v
= LLVMBuildICmp(builder
, pred
,
98 emit_data
->args
[0], emit_data
->args
[1],"");
100 v
= LLVMBuildSExtOrBitCast(builder
, v
,
101 LLVMInt32TypeInContext(context
), "");
103 emit_data
->output
[emit_data
->chan
] = v
;
106 static void emit_ucmp(const struct lp_build_tgsi_action
*action
,
107 struct lp_build_tgsi_context
*bld_base
,
108 struct lp_build_emit_data
*emit_data
)
110 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
112 LLVMValueRef arg0
= LLVMBuildBitCast(builder
, emit_data
->args
[0],
113 bld_base
->uint_bld
.elem_type
, "");
115 LLVMValueRef v
= LLVMBuildICmp(builder
, LLVMIntNE
, arg0
,
116 bld_base
->uint_bld
.zero
, "");
118 emit_data
->output
[emit_data
->chan
] =
119 LLVMBuildSelect(builder
, v
, emit_data
->args
[1], emit_data
->args
[2], "");
122 static void emit_cmp(const struct lp_build_tgsi_action
*action
,
123 struct lp_build_tgsi_context
*bld_base
,
124 struct lp_build_emit_data
*emit_data
)
126 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
127 LLVMValueRef cond
, *args
= emit_data
->args
;
129 cond
= LLVMBuildFCmp(builder
, LLVMRealOLT
, args
[0],
130 bld_base
->base
.zero
, "");
132 emit_data
->output
[emit_data
->chan
] =
133 LLVMBuildSelect(builder
, cond
, args
[1], args
[2], "");
136 static void emit_set_cond(const struct lp_build_tgsi_action
*action
,
137 struct lp_build_tgsi_context
*bld_base
,
138 struct lp_build_emit_data
*emit_data
)
140 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
141 LLVMRealPredicate pred
;
144 /* Use ordered for everything but NE (which is usual for
147 switch (emit_data
->inst
->Instruction
.Opcode
) {
148 case TGSI_OPCODE_SGE
: pred
= LLVMRealOGE
; break;
149 case TGSI_OPCODE_SEQ
: pred
= LLVMRealOEQ
; break;
150 case TGSI_OPCODE_SLE
: pred
= LLVMRealOLE
; break;
151 case TGSI_OPCODE_SLT
: pred
= LLVMRealOLT
; break;
152 case TGSI_OPCODE_SNE
: pred
= LLVMRealUNE
; break;
153 case TGSI_OPCODE_SGT
: pred
= LLVMRealOGT
; break;
154 default: assert(!"unknown instruction"); pred
= 0; break;
157 cond
= LLVMBuildFCmp(builder
,
158 pred
, emit_data
->args
[0], emit_data
->args
[1], "");
160 emit_data
->output
[emit_data
->chan
] = LLVMBuildSelect(builder
,
161 cond
, bld_base
->base
.one
, bld_base
->base
.zero
, "");
164 static void emit_fcmp(const struct lp_build_tgsi_action
*action
,
165 struct lp_build_tgsi_context
*bld_base
,
166 struct lp_build_emit_data
*emit_data
)
168 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
169 LLVMContextRef context
= bld_base
->base
.gallivm
->context
;
170 LLVMRealPredicate pred
;
172 /* Use ordered for everything but NE (which is usual for
175 switch (emit_data
->inst
->Instruction
.Opcode
) {
176 case TGSI_OPCODE_FSEQ
: pred
= LLVMRealOEQ
; break;
177 case TGSI_OPCODE_FSGE
: pred
= LLVMRealOGE
; break;
178 case TGSI_OPCODE_FSLT
: pred
= LLVMRealOLT
; break;
179 case TGSI_OPCODE_FSNE
: pred
= LLVMRealUNE
; break;
180 default: assert(!"unknown instruction"); pred
= 0; break;
183 LLVMValueRef v
= LLVMBuildFCmp(builder
, pred
,
184 emit_data
->args
[0], emit_data
->args
[1],"");
186 v
= LLVMBuildSExtOrBitCast(builder
, v
,
187 LLVMInt32TypeInContext(context
), "");
189 emit_data
->output
[emit_data
->chan
] = v
;
192 static void emit_dcmp(const struct lp_build_tgsi_action
*action
,
193 struct lp_build_tgsi_context
*bld_base
,
194 struct lp_build_emit_data
*emit_data
)
196 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
197 LLVMContextRef context
= bld_base
->base
.gallivm
->context
;
198 LLVMRealPredicate pred
;
200 /* Use ordered for everything but NE (which is usual for
203 switch (emit_data
->inst
->Instruction
.Opcode
) {
204 case TGSI_OPCODE_DSEQ
: pred
= LLVMRealOEQ
; break;
205 case TGSI_OPCODE_DSGE
: pred
= LLVMRealOGE
; break;
206 case TGSI_OPCODE_DSLT
: pred
= LLVMRealOLT
; break;
207 case TGSI_OPCODE_DSNE
: pred
= LLVMRealUNE
; break;
208 default: assert(!"unknown instruction"); pred
= 0; break;
211 LLVMValueRef v
= LLVMBuildFCmp(builder
, pred
,
212 emit_data
->args
[0], emit_data
->args
[1],"");
214 v
= LLVMBuildSExtOrBitCast(builder
, v
,
215 LLVMInt32TypeInContext(context
), "");
217 emit_data
->output
[emit_data
->chan
] = v
;
220 static void emit_not(const struct lp_build_tgsi_action
*action
,
221 struct lp_build_tgsi_context
*bld_base
,
222 struct lp_build_emit_data
*emit_data
)
224 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
225 LLVMValueRef v
= bitcast(bld_base
, TGSI_TYPE_UNSIGNED
,
227 emit_data
->output
[emit_data
->chan
] = LLVMBuildNot(builder
, v
, "");
230 static void emit_arl(const struct lp_build_tgsi_action
*action
,
231 struct lp_build_tgsi_context
*bld_base
,
232 struct lp_build_emit_data
*emit_data
)
234 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
235 LLVMValueRef floor_index
= lp_build_emit_llvm_unary(bld_base
, TGSI_OPCODE_FLR
, emit_data
->args
[0]);
236 emit_data
->output
[emit_data
->chan
] = LLVMBuildFPToSI(builder
,
237 floor_index
, bld_base
->base
.int_elem_type
, "");
240 static void emit_and(const struct lp_build_tgsi_action
*action
,
241 struct lp_build_tgsi_context
*bld_base
,
242 struct lp_build_emit_data
*emit_data
)
244 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
245 emit_data
->output
[emit_data
->chan
] = LLVMBuildAnd(builder
,
246 emit_data
->args
[0], emit_data
->args
[1], "");
249 static void emit_or(const struct lp_build_tgsi_action
*action
,
250 struct lp_build_tgsi_context
*bld_base
,
251 struct lp_build_emit_data
*emit_data
)
253 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
254 emit_data
->output
[emit_data
->chan
] = LLVMBuildOr(builder
,
255 emit_data
->args
[0], emit_data
->args
[1], "");
258 static void emit_uadd(const struct lp_build_tgsi_action
*action
,
259 struct lp_build_tgsi_context
*bld_base
,
260 struct lp_build_emit_data
*emit_data
)
262 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
263 emit_data
->output
[emit_data
->chan
] = LLVMBuildAdd(builder
,
264 emit_data
->args
[0], emit_data
->args
[1], "");
267 static void emit_udiv(const struct lp_build_tgsi_action
*action
,
268 struct lp_build_tgsi_context
*bld_base
,
269 struct lp_build_emit_data
*emit_data
)
271 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
272 emit_data
->output
[emit_data
->chan
] = LLVMBuildUDiv(builder
,
273 emit_data
->args
[0], emit_data
->args
[1], "");
276 static void emit_idiv(const struct lp_build_tgsi_action
*action
,
277 struct lp_build_tgsi_context
*bld_base
,
278 struct lp_build_emit_data
*emit_data
)
280 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
281 emit_data
->output
[emit_data
->chan
] = LLVMBuildSDiv(builder
,
282 emit_data
->args
[0], emit_data
->args
[1], "");
285 static void emit_mod(const struct lp_build_tgsi_action
*action
,
286 struct lp_build_tgsi_context
*bld_base
,
287 struct lp_build_emit_data
*emit_data
)
289 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
290 emit_data
->output
[emit_data
->chan
] = LLVMBuildSRem(builder
,
291 emit_data
->args
[0], emit_data
->args
[1], "");
294 static void emit_umod(const struct lp_build_tgsi_action
*action
,
295 struct lp_build_tgsi_context
*bld_base
,
296 struct lp_build_emit_data
*emit_data
)
298 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
299 emit_data
->output
[emit_data
->chan
] = LLVMBuildURem(builder
,
300 emit_data
->args
[0], emit_data
->args
[1], "");
303 static void emit_shl(const struct lp_build_tgsi_action
*action
,
304 struct lp_build_tgsi_context
*bld_base
,
305 struct lp_build_emit_data
*emit_data
)
307 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
308 emit_data
->output
[emit_data
->chan
] = LLVMBuildShl(builder
,
309 emit_data
->args
[0], emit_data
->args
[1], "");
312 static void emit_ushr(const struct lp_build_tgsi_action
*action
,
313 struct lp_build_tgsi_context
*bld_base
,
314 struct lp_build_emit_data
*emit_data
)
316 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
317 emit_data
->output
[emit_data
->chan
] = LLVMBuildLShr(builder
,
318 emit_data
->args
[0], emit_data
->args
[1], "");
320 static void emit_ishr(const struct lp_build_tgsi_action
*action
,
321 struct lp_build_tgsi_context
*bld_base
,
322 struct lp_build_emit_data
*emit_data
)
324 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
325 emit_data
->output
[emit_data
->chan
] = LLVMBuildAShr(builder
,
326 emit_data
->args
[0], emit_data
->args
[1], "");
329 static void emit_xor(const struct lp_build_tgsi_action
*action
,
330 struct lp_build_tgsi_context
*bld_base
,
331 struct lp_build_emit_data
*emit_data
)
333 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
334 emit_data
->output
[emit_data
->chan
] = LLVMBuildXor(builder
,
335 emit_data
->args
[0], emit_data
->args
[1], "");
338 static void emit_ssg(const struct lp_build_tgsi_action
*action
,
339 struct lp_build_tgsi_context
*bld_base
,
340 struct lp_build_emit_data
*emit_data
)
342 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
344 LLVMValueRef cmp
, val
;
346 if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_I64SSG
) {
347 cmp
= LLVMBuildICmp(builder
, LLVMIntSGT
, emit_data
->args
[0], bld_base
->int64_bld
.zero
, "");
348 val
= LLVMBuildSelect(builder
, cmp
, bld_base
->int64_bld
.one
, emit_data
->args
[0], "");
349 cmp
= LLVMBuildICmp(builder
, LLVMIntSGE
, val
, bld_base
->int64_bld
.zero
, "");
350 val
= LLVMBuildSelect(builder
, cmp
, val
, LLVMConstInt(bld_base
->int64_bld
.elem_type
, -1, true), "");
351 } else if (emit_data
->inst
->Instruction
.Opcode
== TGSI_OPCODE_ISSG
) {
352 cmp
= LLVMBuildICmp(builder
, LLVMIntSGT
, emit_data
->args
[0], bld_base
->int_bld
.zero
, "");
353 val
= LLVMBuildSelect(builder
, cmp
, bld_base
->int_bld
.one
, emit_data
->args
[0], "");
354 cmp
= LLVMBuildICmp(builder
, LLVMIntSGE
, val
, bld_base
->int_bld
.zero
, "");
355 val
= LLVMBuildSelect(builder
, cmp
, val
, LLVMConstInt(bld_base
->int_bld
.elem_type
, -1, true), "");
356 } else { // float SSG
357 cmp
= LLVMBuildFCmp(builder
, LLVMRealOGT
, emit_data
->args
[0], bld_base
->base
.zero
, "");
358 val
= LLVMBuildSelect(builder
, cmp
, bld_base
->base
.one
, emit_data
->args
[0], "");
359 cmp
= LLVMBuildFCmp(builder
, LLVMRealOGE
, val
, bld_base
->base
.zero
, "");
360 val
= LLVMBuildSelect(builder
, cmp
, val
, LLVMConstReal(bld_base
->base
.elem_type
, -1), "");
363 emit_data
->output
[emit_data
->chan
] = val
;
366 static void emit_ineg(const struct lp_build_tgsi_action
*action
,
367 struct lp_build_tgsi_context
*bld_base
,
368 struct lp_build_emit_data
*emit_data
)
370 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
371 emit_data
->output
[emit_data
->chan
] = LLVMBuildNeg(builder
,
372 emit_data
->args
[0], "");
375 static void emit_dneg(const struct lp_build_tgsi_action
*action
,
376 struct lp_build_tgsi_context
*bld_base
,
377 struct lp_build_emit_data
*emit_data
)
379 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
380 emit_data
->output
[emit_data
->chan
] = LLVMBuildFNeg(builder
,
381 emit_data
->args
[0], "");
384 static void emit_frac(const struct lp_build_tgsi_action
*action
,
385 struct lp_build_tgsi_context
*bld_base
,
386 struct lp_build_emit_data
*emit_data
)
388 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
391 if (emit_data
->info
->opcode
== TGSI_OPCODE_FRC
)
392 intr
= "llvm.floor.f32";
393 else if (emit_data
->info
->opcode
== TGSI_OPCODE_DFRAC
)
394 intr
= "llvm.floor.f64";
400 LLVMValueRef floor
= lp_build_intrinsic(builder
, intr
, emit_data
->dst_type
,
401 &emit_data
->args
[0], 1,
402 LP_FUNC_ATTR_READNONE
);
403 emit_data
->output
[emit_data
->chan
] = LLVMBuildFSub(builder
,
404 emit_data
->args
[0], floor
, "");
407 static void emit_f2i(const struct lp_build_tgsi_action
*action
,
408 struct lp_build_tgsi_context
*bld_base
,
409 struct lp_build_emit_data
*emit_data
)
411 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
412 emit_data
->output
[emit_data
->chan
] = LLVMBuildFPToSI(builder
,
413 emit_data
->args
[0], bld_base
->int_bld
.elem_type
, "");
416 static void emit_f2u(const struct lp_build_tgsi_action
*action
,
417 struct lp_build_tgsi_context
*bld_base
,
418 struct lp_build_emit_data
*emit_data
)
420 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
421 emit_data
->output
[emit_data
->chan
] = LLVMBuildFPToUI(builder
,
422 emit_data
->args
[0], bld_base
->uint_bld
.elem_type
, "");
425 static void emit_i2f(const struct lp_build_tgsi_action
*action
,
426 struct lp_build_tgsi_context
*bld_base
,
427 struct lp_build_emit_data
*emit_data
)
429 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
430 emit_data
->output
[emit_data
->chan
] = LLVMBuildSIToFP(builder
,
431 emit_data
->args
[0], bld_base
->base
.elem_type
, "");
434 static void emit_u2f(const struct lp_build_tgsi_action
*action
,
435 struct lp_build_tgsi_context
*bld_base
,
436 struct lp_build_emit_data
*emit_data
)
438 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
439 emit_data
->output
[emit_data
->chan
] = LLVMBuildUIToFP(builder
,
440 emit_data
->args
[0], bld_base
->base
.elem_type
, "");
444 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action
*action
,
445 struct lp_build_tgsi_context
*bld_base
,
446 struct lp_build_emit_data
*emit_data
)
448 struct lp_build_context
*base
= &bld_base
->base
;
449 emit_data
->output
[emit_data
->chan
] =
450 lp_build_intrinsic(base
->gallivm
->builder
, action
->intr_name
,
451 emit_data
->dst_type
, emit_data
->args
,
452 emit_data
->arg_count
, LP_FUNC_ATTR_READNONE
);
455 static void emit_bfi(const struct lp_build_tgsi_action
*action
,
456 struct lp_build_tgsi_context
*bld_base
,
457 struct lp_build_emit_data
*emit_data
)
459 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
460 LLVMBuilderRef builder
= gallivm
->builder
;
461 LLVMValueRef bfi_args
[3];
462 LLVMValueRef bfi_sm5
;
465 // Calculate the bitmask: (((1 << src3) - 1) << src2
466 bfi_args
[0] = LLVMBuildShl(builder
,
467 LLVMBuildSub(builder
,
468 LLVMBuildShl(builder
,
469 bld_base
->int_bld
.one
,
470 emit_data
->args
[3], ""),
471 bld_base
->int_bld
.one
, ""),
472 emit_data
->args
[2], "");
474 bfi_args
[1] = LLVMBuildShl(builder
, emit_data
->args
[1],
475 emit_data
->args
[2], "");
477 bfi_args
[2] = emit_data
->args
[0];
480 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
481 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
484 LLVMBuildXor(builder
, bfi_args
[2],
485 LLVMBuildAnd(builder
, bfi_args
[0],
486 LLVMBuildXor(builder
, bfi_args
[1], bfi_args
[2],
489 /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
490 * uses the convenient V_BFI lowering for the above, which follows SM5
491 * and disagrees with GLSL semantics when bits (src3) is 32.
493 cond
= LLVMBuildICmp(builder
, LLVMIntUGE
, emit_data
->args
[3],
494 lp_build_const_int32(gallivm
, 32), "");
495 emit_data
->output
[emit_data
->chan
] =
496 LLVMBuildSelect(builder
, cond
, emit_data
->args
[1], bfi_sm5
, "");
499 static void emit_bfe(const struct lp_build_tgsi_action
*action
,
500 struct lp_build_tgsi_context
*bld_base
,
501 struct lp_build_emit_data
*emit_data
)
503 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
504 LLVMBuilderRef builder
= gallivm
->builder
;
505 LLVMValueRef bfe_sm5
;
508 bfe_sm5
= lp_build_intrinsic(builder
, action
->intr_name
,
509 emit_data
->dst_type
, emit_data
->args
,
510 emit_data
->arg_count
, LP_FUNC_ATTR_READNONE
);
512 /* Correct for GLSL semantics. */
513 cond
= LLVMBuildICmp(builder
, LLVMIntUGE
, emit_data
->args
[2],
514 lp_build_const_int32(gallivm
, 32), "");
515 emit_data
->output
[emit_data
->chan
] =
516 LLVMBuildSelect(builder
, cond
, emit_data
->args
[0], bfe_sm5
, "");
519 /* this is ffs in C */
520 static void emit_lsb(const struct lp_build_tgsi_action
*action
,
521 struct lp_build_tgsi_context
*bld_base
,
522 struct lp_build_emit_data
*emit_data
)
524 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
525 LLVMBuilderRef builder
= gallivm
->builder
;
526 LLVMValueRef args
[2] = {
529 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
530 * add special code to check for x=0. The reason is that
531 * the LLVM behavior for x=0 is different from what we
532 * need here. However, LLVM also assumes that ffs(x) is
533 * in [0, 31], but GLSL expects that ffs(0) = -1, so
534 * a conditional assignment to handle 0 is still required.
536 LLVMConstInt(LLVMInt1TypeInContext(gallivm
->context
), 1, 0)
540 lp_build_intrinsic(gallivm
->builder
, "llvm.cttz.i32",
541 emit_data
->dst_type
, args
, ARRAY_SIZE(args
),
542 LP_FUNC_ATTR_READNONE
);
544 /* TODO: We need an intrinsic to skip this conditional. */
545 /* Check for zero: */
546 emit_data
->output
[emit_data
->chan
] =
547 LLVMBuildSelect(builder
,
548 LLVMBuildICmp(builder
, LLVMIntEQ
, args
[0],
549 bld_base
->uint_bld
.zero
, ""),
550 lp_build_const_int32(gallivm
, -1), lsb
, "");
553 /* Find the last bit set. */
554 static void emit_umsb(const struct lp_build_tgsi_action
*action
,
555 struct lp_build_tgsi_context
*bld_base
,
556 struct lp_build_emit_data
*emit_data
)
558 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
559 LLVMBuilderRef builder
= gallivm
->builder
;
560 LLVMValueRef args
[2] = {
562 /* Don't generate code for handling zero: */
563 LLVMConstInt(LLVMInt1TypeInContext(gallivm
->context
), 1, 0)
567 lp_build_intrinsic(builder
, "llvm.ctlz.i32",
568 emit_data
->dst_type
, args
, ARRAY_SIZE(args
),
569 LP_FUNC_ATTR_READNONE
);
571 /* The HW returns the last bit index from MSB, but TGSI wants
572 * the index from LSB. Invert it by doing "31 - msb". */
573 msb
= LLVMBuildSub(builder
, lp_build_const_int32(gallivm
, 31),
576 /* Check for zero: */
577 emit_data
->output
[emit_data
->chan
] =
578 LLVMBuildSelect(builder
,
579 LLVMBuildICmp(builder
, LLVMIntEQ
, args
[0],
580 bld_base
->uint_bld
.zero
, ""),
581 lp_build_const_int32(gallivm
, -1), msb
, "");
584 /* Find the last bit opposite of the sign bit. */
585 static void emit_imsb(const struct lp_build_tgsi_action
*action
,
586 struct lp_build_tgsi_context
*bld_base
,
587 struct lp_build_emit_data
*emit_data
)
589 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
590 LLVMBuilderRef builder
= gallivm
->builder
;
591 LLVMValueRef arg
= emit_data
->args
[0];
594 lp_build_intrinsic(builder
, "llvm.AMDGPU.flbit.i32",
595 emit_data
->dst_type
, &arg
, 1,
596 LP_FUNC_ATTR_READNONE
);
598 /* The HW returns the last bit index from MSB, but TGSI wants
599 * the index from LSB. Invert it by doing "31 - msb". */
600 msb
= LLVMBuildSub(builder
, lp_build_const_int32(gallivm
, 31),
603 /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
604 LLVMValueRef all_ones
= lp_build_const_int32(gallivm
, -1);
608 LLVMBuildICmp(builder
, LLVMIntEQ
, arg
,
609 bld_base
->uint_bld
.zero
, ""),
610 LLVMBuildICmp(builder
, LLVMIntEQ
, arg
,
613 emit_data
->output
[emit_data
->chan
] =
614 LLVMBuildSelect(builder
, cond
, all_ones
, msb
, "");
617 static void emit_iabs(const struct lp_build_tgsi_action
*action
,
618 struct lp_build_tgsi_context
*bld_base
,
619 struct lp_build_emit_data
*emit_data
)
621 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
623 emit_data
->output
[emit_data
->chan
] =
624 lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_IMAX
,
626 LLVMBuildNeg(builder
,
627 emit_data
->args
[0], ""));
630 static void emit_minmax_int(const struct lp_build_tgsi_action
*action
,
631 struct lp_build_tgsi_context
*bld_base
,
632 struct lp_build_emit_data
*emit_data
)
634 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
637 switch (emit_data
->info
->opcode
) {
640 case TGSI_OPCODE_IMAX
:
641 case TGSI_OPCODE_I64MAX
:
644 case TGSI_OPCODE_IMIN
:
645 case TGSI_OPCODE_I64MIN
:
648 case TGSI_OPCODE_UMAX
:
649 case TGSI_OPCODE_U64MAX
:
652 case TGSI_OPCODE_UMIN
:
653 case TGSI_OPCODE_U64MIN
:
658 emit_data
->output
[emit_data
->chan
] =
659 LLVMBuildSelect(builder
,
660 LLVMBuildICmp(builder
, op
, emit_data
->args
[0],
661 emit_data
->args
[1], ""),
663 emit_data
->args
[1], "");
666 static void pk2h_fetch_args(struct lp_build_tgsi_context
*bld_base
,
667 struct lp_build_emit_data
*emit_data
)
669 emit_data
->args
[0] = lp_build_emit_fetch(bld_base
, emit_data
->inst
,
671 emit_data
->args
[1] = lp_build_emit_fetch(bld_base
, emit_data
->inst
,
675 static void emit_pk2h(const struct lp_build_tgsi_action
*action
,
676 struct lp_build_tgsi_context
*bld_base
,
677 struct lp_build_emit_data
*emit_data
)
679 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
680 LLVMContextRef context
= bld_base
->base
.gallivm
->context
;
681 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
682 LLVMTypeRef fp16
, i16
;
683 LLVMValueRef const16
, comp
[2];
686 fp16
= LLVMHalfTypeInContext(context
);
687 i16
= LLVMInt16TypeInContext(context
);
688 const16
= lp_build_const_int32(uint_bld
->gallivm
, 16);
690 for (i
= 0; i
< 2; i
++) {
691 comp
[i
] = LLVMBuildFPTrunc(builder
, emit_data
->args
[i
], fp16
, "");
692 comp
[i
] = LLVMBuildBitCast(builder
, comp
[i
], i16
, "");
693 comp
[i
] = LLVMBuildZExt(builder
, comp
[i
], uint_bld
->elem_type
, "");
696 comp
[1] = LLVMBuildShl(builder
, comp
[1], const16
, "");
697 comp
[0] = LLVMBuildOr(builder
, comp
[0], comp
[1], "");
699 emit_data
->output
[emit_data
->chan
] = comp
[0];
702 static void up2h_fetch_args(struct lp_build_tgsi_context
*bld_base
,
703 struct lp_build_emit_data
*emit_data
)
705 emit_data
->args
[0] = lp_build_emit_fetch(bld_base
, emit_data
->inst
,
709 static void emit_up2h(const struct lp_build_tgsi_action
*action
,
710 struct lp_build_tgsi_context
*bld_base
,
711 struct lp_build_emit_data
*emit_data
)
713 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
714 LLVMContextRef context
= bld_base
->base
.gallivm
->context
;
715 struct lp_build_context
*uint_bld
= &bld_base
->uint_bld
;
716 LLVMTypeRef fp16
, i16
;
717 LLVMValueRef const16
, input
, val
;
720 fp16
= LLVMHalfTypeInContext(context
);
721 i16
= LLVMInt16TypeInContext(context
);
722 const16
= lp_build_const_int32(uint_bld
->gallivm
, 16);
723 input
= emit_data
->args
[0];
725 for (i
= 0; i
< 2; i
++) {
726 val
= i
== 1 ? LLVMBuildLShr(builder
, input
, const16
, "") : input
;
727 val
= LLVMBuildTrunc(builder
, val
, i16
, "");
728 val
= LLVMBuildBitCast(builder
, val
, fp16
, "");
729 emit_data
->output
[i
] =
730 LLVMBuildFPExt(builder
, val
, bld_base
->base
.elem_type
, "");
734 static void emit_fdiv(const struct lp_build_tgsi_action
*action
,
735 struct lp_build_tgsi_context
*bld_base
,
736 struct lp_build_emit_data
*emit_data
)
738 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
740 emit_data
->output
[emit_data
->chan
] =
741 LLVMBuildFDiv(bld_base
->base
.gallivm
->builder
,
742 emit_data
->args
[0], emit_data
->args
[1], "");
744 /* Use v_rcp_f32 instead of precise division. */
745 if (HAVE_LLVM
>= 0x0309 &&
746 !LLVMIsConstant(emit_data
->output
[emit_data
->chan
]))
747 LLVMSetMetadata(emit_data
->output
[emit_data
->chan
],
748 ctx
->fpmath_md_kind
, ctx
->fpmath_md_2p5_ulp
);
751 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
752 * the target machine. f64 needs global unsafe math flags to get rsq. */
753 static void emit_rsq(const struct lp_build_tgsi_action
*action
,
754 struct lp_build_tgsi_context
*bld_base
,
755 struct lp_build_emit_data
*emit_data
)
758 lp_build_emit_llvm_unary(bld_base
, TGSI_OPCODE_SQRT
,
761 emit_data
->output
[emit_data
->chan
] =
762 lp_build_emit_llvm_binary(bld_base
, TGSI_OPCODE_DIV
,
763 bld_base
->base
.one
, sqrt
);
766 void si_shader_context_init_alu(struct lp_build_tgsi_context
*bld_base
)
768 lp_set_default_actions(bld_base
);
770 bld_base
->op_actions
[TGSI_OPCODE_AND
].emit
= emit_and
;
771 bld_base
->op_actions
[TGSI_OPCODE_ARL
].emit
= emit_arl
;
772 bld_base
->op_actions
[TGSI_OPCODE_BFI
].emit
= emit_bfi
;
773 bld_base
->op_actions
[TGSI_OPCODE_BREV
].emit
= build_tgsi_intrinsic_nomem
;
774 bld_base
->op_actions
[TGSI_OPCODE_BREV
].intr_name
=
775 HAVE_LLVM
>= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
776 bld_base
->op_actions
[TGSI_OPCODE_CEIL
].emit
= build_tgsi_intrinsic_nomem
;
777 bld_base
->op_actions
[TGSI_OPCODE_CEIL
].intr_name
= "llvm.ceil.f32";
778 bld_base
->op_actions
[TGSI_OPCODE_CLAMP
].emit
= build_tgsi_intrinsic_nomem
;
779 bld_base
->op_actions
[TGSI_OPCODE_CLAMP
].intr_name
=
780 HAVE_LLVM
>= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
781 bld_base
->op_actions
[TGSI_OPCODE_CMP
].emit
= emit_cmp
;
782 bld_base
->op_actions
[TGSI_OPCODE_COS
].emit
= build_tgsi_intrinsic_nomem
;
783 bld_base
->op_actions
[TGSI_OPCODE_COS
].intr_name
= "llvm.cos.f32";
784 bld_base
->op_actions
[TGSI_OPCODE_DABS
].emit
= build_tgsi_intrinsic_nomem
;
785 bld_base
->op_actions
[TGSI_OPCODE_DABS
].intr_name
= "llvm.fabs.f64";
786 bld_base
->op_actions
[TGSI_OPCODE_DFMA
].emit
= build_tgsi_intrinsic_nomem
;
787 bld_base
->op_actions
[TGSI_OPCODE_DFMA
].intr_name
= "llvm.fma.f64";
788 bld_base
->op_actions
[TGSI_OPCODE_DFRAC
].emit
= emit_frac
;
789 bld_base
->op_actions
[TGSI_OPCODE_DIV
].emit
= emit_fdiv
;
790 bld_base
->op_actions
[TGSI_OPCODE_DNEG
].emit
= emit_dneg
;
791 bld_base
->op_actions
[TGSI_OPCODE_DSEQ
].emit
= emit_dcmp
;
792 bld_base
->op_actions
[TGSI_OPCODE_DSGE
].emit
= emit_dcmp
;
793 bld_base
->op_actions
[TGSI_OPCODE_DSLT
].emit
= emit_dcmp
;
794 bld_base
->op_actions
[TGSI_OPCODE_DSNE
].emit
= emit_dcmp
;
795 bld_base
->op_actions
[TGSI_OPCODE_DRSQ
].emit
= build_tgsi_intrinsic_nomem
;
796 bld_base
->op_actions
[TGSI_OPCODE_DRSQ
].intr_name
=
797 HAVE_LLVM
>= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
798 bld_base
->op_actions
[TGSI_OPCODE_DSQRT
].emit
= build_tgsi_intrinsic_nomem
;
799 bld_base
->op_actions
[TGSI_OPCODE_DSQRT
].intr_name
= "llvm.sqrt.f64";
800 bld_base
->op_actions
[TGSI_OPCODE_EX2
].emit
= build_tgsi_intrinsic_nomem
;
801 bld_base
->op_actions
[TGSI_OPCODE_EX2
].intr_name
=
802 HAVE_LLVM
>= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
803 bld_base
->op_actions
[TGSI_OPCODE_FLR
].emit
= build_tgsi_intrinsic_nomem
;
804 bld_base
->op_actions
[TGSI_OPCODE_FLR
].intr_name
= "llvm.floor.f32";
805 bld_base
->op_actions
[TGSI_OPCODE_FMA
].emit
=
806 bld_base
->op_actions
[TGSI_OPCODE_MAD
].emit
;
807 bld_base
->op_actions
[TGSI_OPCODE_FRC
].emit
= emit_frac
;
808 bld_base
->op_actions
[TGSI_OPCODE_F2I
].emit
= emit_f2i
;
809 bld_base
->op_actions
[TGSI_OPCODE_F2U
].emit
= emit_f2u
;
810 bld_base
->op_actions
[TGSI_OPCODE_FSEQ
].emit
= emit_fcmp
;
811 bld_base
->op_actions
[TGSI_OPCODE_FSGE
].emit
= emit_fcmp
;
812 bld_base
->op_actions
[TGSI_OPCODE_FSLT
].emit
= emit_fcmp
;
813 bld_base
->op_actions
[TGSI_OPCODE_FSNE
].emit
= emit_fcmp
;
814 bld_base
->op_actions
[TGSI_OPCODE_IABS
].emit
= emit_iabs
;
815 bld_base
->op_actions
[TGSI_OPCODE_IBFE
].emit
= emit_bfe
;
816 bld_base
->op_actions
[TGSI_OPCODE_IBFE
].intr_name
= "llvm.AMDGPU.bfe.i32";
817 bld_base
->op_actions
[TGSI_OPCODE_IDIV
].emit
= emit_idiv
;
818 bld_base
->op_actions
[TGSI_OPCODE_IMAX
].emit
= emit_minmax_int
;
819 bld_base
->op_actions
[TGSI_OPCODE_IMIN
].emit
= emit_minmax_int
;
820 bld_base
->op_actions
[TGSI_OPCODE_IMSB
].emit
= emit_imsb
;
821 bld_base
->op_actions
[TGSI_OPCODE_INEG
].emit
= emit_ineg
;
822 bld_base
->op_actions
[TGSI_OPCODE_ISHR
].emit
= emit_ishr
;
823 bld_base
->op_actions
[TGSI_OPCODE_ISGE
].emit
= emit_icmp
;
824 bld_base
->op_actions
[TGSI_OPCODE_ISLT
].emit
= emit_icmp
;
825 bld_base
->op_actions
[TGSI_OPCODE_ISSG
].emit
= emit_ssg
;
826 bld_base
->op_actions
[TGSI_OPCODE_I2F
].emit
= emit_i2f
;
827 bld_base
->op_actions
[TGSI_OPCODE_KILL_IF
].fetch_args
= kill_if_fetch_args
;
828 bld_base
->op_actions
[TGSI_OPCODE_KILL_IF
].emit
= kil_emit
;
829 bld_base
->op_actions
[TGSI_OPCODE_KILL_IF
].intr_name
= "llvm.AMDGPU.kill";
830 bld_base
->op_actions
[TGSI_OPCODE_KILL
].emit
= lp_build_tgsi_intrinsic
;
831 bld_base
->op_actions
[TGSI_OPCODE_KILL
].intr_name
= "llvm.AMDGPU.kilp";
832 bld_base
->op_actions
[TGSI_OPCODE_LSB
].emit
= emit_lsb
;
833 bld_base
->op_actions
[TGSI_OPCODE_LG2
].emit
= build_tgsi_intrinsic_nomem
;
834 bld_base
->op_actions
[TGSI_OPCODE_LG2
].intr_name
= "llvm.log2.f32";
835 bld_base
->op_actions
[TGSI_OPCODE_MAX
].emit
= build_tgsi_intrinsic_nomem
;
836 bld_base
->op_actions
[TGSI_OPCODE_MAX
].intr_name
= "llvm.maxnum.f32";
837 bld_base
->op_actions
[TGSI_OPCODE_MIN
].emit
= build_tgsi_intrinsic_nomem
;
838 bld_base
->op_actions
[TGSI_OPCODE_MIN
].intr_name
= "llvm.minnum.f32";
839 bld_base
->op_actions
[TGSI_OPCODE_MOD
].emit
= emit_mod
;
840 bld_base
->op_actions
[TGSI_OPCODE_UMSB
].emit
= emit_umsb
;
841 bld_base
->op_actions
[TGSI_OPCODE_NOT
].emit
= emit_not
;
842 bld_base
->op_actions
[TGSI_OPCODE_OR
].emit
= emit_or
;
843 bld_base
->op_actions
[TGSI_OPCODE_PK2H
].fetch_args
= pk2h_fetch_args
;
844 bld_base
->op_actions
[TGSI_OPCODE_PK2H
].emit
= emit_pk2h
;
845 bld_base
->op_actions
[TGSI_OPCODE_POPC
].emit
= build_tgsi_intrinsic_nomem
;
846 bld_base
->op_actions
[TGSI_OPCODE_POPC
].intr_name
= "llvm.ctpop.i32";
847 bld_base
->op_actions
[TGSI_OPCODE_POW
].emit
= build_tgsi_intrinsic_nomem
;
848 bld_base
->op_actions
[TGSI_OPCODE_POW
].intr_name
= "llvm.pow.f32";
849 bld_base
->op_actions
[TGSI_OPCODE_ROUND
].emit
= build_tgsi_intrinsic_nomem
;
850 bld_base
->op_actions
[TGSI_OPCODE_ROUND
].intr_name
= "llvm.rint.f32";
851 bld_base
->op_actions
[TGSI_OPCODE_RSQ
].emit
= emit_rsq
;
852 bld_base
->op_actions
[TGSI_OPCODE_SGE
].emit
= emit_set_cond
;
853 bld_base
->op_actions
[TGSI_OPCODE_SEQ
].emit
= emit_set_cond
;
854 bld_base
->op_actions
[TGSI_OPCODE_SHL
].emit
= emit_shl
;
855 bld_base
->op_actions
[TGSI_OPCODE_SLE
].emit
= emit_set_cond
;
856 bld_base
->op_actions
[TGSI_OPCODE_SLT
].emit
= emit_set_cond
;
857 bld_base
->op_actions
[TGSI_OPCODE_SNE
].emit
= emit_set_cond
;
858 bld_base
->op_actions
[TGSI_OPCODE_SGT
].emit
= emit_set_cond
;
859 bld_base
->op_actions
[TGSI_OPCODE_SIN
].emit
= build_tgsi_intrinsic_nomem
;
860 bld_base
->op_actions
[TGSI_OPCODE_SIN
].intr_name
= "llvm.sin.f32";
861 bld_base
->op_actions
[TGSI_OPCODE_SQRT
].emit
= build_tgsi_intrinsic_nomem
;
862 bld_base
->op_actions
[TGSI_OPCODE_SQRT
].intr_name
= "llvm.sqrt.f32";
863 bld_base
->op_actions
[TGSI_OPCODE_SSG
].emit
= emit_ssg
;
864 bld_base
->op_actions
[TGSI_OPCODE_TRUNC
].emit
= build_tgsi_intrinsic_nomem
;
865 bld_base
->op_actions
[TGSI_OPCODE_TRUNC
].intr_name
= "llvm.trunc.f32";
866 bld_base
->op_actions
[TGSI_OPCODE_UADD
].emit
= emit_uadd
;
867 bld_base
->op_actions
[TGSI_OPCODE_UBFE
].emit
= emit_bfe
;
868 bld_base
->op_actions
[TGSI_OPCODE_UBFE
].intr_name
= "llvm.AMDGPU.bfe.u32";
869 bld_base
->op_actions
[TGSI_OPCODE_UDIV
].emit
= emit_udiv
;
870 bld_base
->op_actions
[TGSI_OPCODE_UMAX
].emit
= emit_minmax_int
;
871 bld_base
->op_actions
[TGSI_OPCODE_UMIN
].emit
= emit_minmax_int
;
872 bld_base
->op_actions
[TGSI_OPCODE_UMOD
].emit
= emit_umod
;
873 bld_base
->op_actions
[TGSI_OPCODE_USEQ
].emit
= emit_icmp
;
874 bld_base
->op_actions
[TGSI_OPCODE_USGE
].emit
= emit_icmp
;
875 bld_base
->op_actions
[TGSI_OPCODE_USHR
].emit
= emit_ushr
;
876 bld_base
->op_actions
[TGSI_OPCODE_USLT
].emit
= emit_icmp
;
877 bld_base
->op_actions
[TGSI_OPCODE_USNE
].emit
= emit_icmp
;
878 bld_base
->op_actions
[TGSI_OPCODE_U2F
].emit
= emit_u2f
;
879 bld_base
->op_actions
[TGSI_OPCODE_XOR
].emit
= emit_xor
;
880 bld_base
->op_actions
[TGSI_OPCODE_UCMP
].emit
= emit_ucmp
;
881 bld_base
->op_actions
[TGSI_OPCODE_UP2H
].fetch_args
= up2h_fetch_args
;
882 bld_base
->op_actions
[TGSI_OPCODE_UP2H
].emit
= emit_up2h
;
884 bld_base
->op_actions
[TGSI_OPCODE_I64MAX
].emit
= emit_minmax_int
;
885 bld_base
->op_actions
[TGSI_OPCODE_I64MIN
].emit
= emit_minmax_int
;
886 bld_base
->op_actions
[TGSI_OPCODE_U64MAX
].emit
= emit_minmax_int
;
887 bld_base
->op_actions
[TGSI_OPCODE_U64MIN
].emit
= emit_minmax_int
;
888 bld_base
->op_actions
[TGSI_OPCODE_I64ABS
].emit
= emit_iabs
;
889 bld_base
->op_actions
[TGSI_OPCODE_I64SSG
].emit
= emit_ssg
;
890 bld_base
->op_actions
[TGSI_OPCODE_I64NEG
].emit
= emit_ineg
;
892 bld_base
->op_actions
[TGSI_OPCODE_U64SEQ
].emit
= emit_icmp
;
893 bld_base
->op_actions
[TGSI_OPCODE_U64SNE
].emit
= emit_icmp
;
894 bld_base
->op_actions
[TGSI_OPCODE_U64SGE
].emit
= emit_icmp
;
895 bld_base
->op_actions
[TGSI_OPCODE_U64SLT
].emit
= emit_icmp
;
896 bld_base
->op_actions
[TGSI_OPCODE_I64SGE
].emit
= emit_icmp
;
897 bld_base
->op_actions
[TGSI_OPCODE_I64SLT
].emit
= emit_icmp
;
899 bld_base
->op_actions
[TGSI_OPCODE_U64ADD
].emit
= emit_uadd
;
900 bld_base
->op_actions
[TGSI_OPCODE_U64SHL
].emit
= emit_shl
;
901 bld_base
->op_actions
[TGSI_OPCODE_U64SHR
].emit
= emit_ushr
;
902 bld_base
->op_actions
[TGSI_OPCODE_I64SHR
].emit
= emit_ishr
;
904 bld_base
->op_actions
[TGSI_OPCODE_U64MOD
].emit
= emit_umod
;
905 bld_base
->op_actions
[TGSI_OPCODE_I64MOD
].emit
= emit_mod
;
906 bld_base
->op_actions
[TGSI_OPCODE_U64DIV
].emit
= emit_udiv
;
907 bld_base
->op_actions
[TGSI_OPCODE_I64DIV
].emit
= emit_idiv
;
910 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
911 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
912 * already multiplied by two. id is the cube face number.
914 struct cube_selection_coords
{
920 static void build_cube_intrinsic(struct gallivm_state
*gallivm
,
922 struct cube_selection_coords
*out
)
924 LLVMBuilderRef builder
= gallivm
->builder
;
926 if (HAVE_LLVM
>= 0x0309) {
927 LLVMTypeRef f32
= LLVMTypeOf(in
[0]);
929 out
->stc
[1] = lp_build_intrinsic(builder
, "llvm.amdgcn.cubetc",
930 f32
, in
, 3, LP_FUNC_ATTR_READNONE
);
931 out
->stc
[0] = lp_build_intrinsic(builder
, "llvm.amdgcn.cubesc",
932 f32
, in
, 3, LP_FUNC_ATTR_READNONE
);
933 out
->ma
= lp_build_intrinsic(builder
, "llvm.amdgcn.cubema",
934 f32
, in
, 3, LP_FUNC_ATTR_READNONE
);
935 out
->id
= lp_build_intrinsic(builder
, "llvm.amdgcn.cubeid",
936 f32
, in
, 3, LP_FUNC_ATTR_READNONE
);
938 LLVMValueRef c
[4] = {
942 LLVMGetUndef(LLVMTypeOf(in
[0]))
944 LLVMValueRef vec
= lp_build_gather_values(gallivm
, c
, 4);
947 lp_build_intrinsic(builder
, "llvm.AMDGPU.cube",
948 LLVMTypeOf(vec
), &vec
, 1,
949 LP_FUNC_ATTR_READNONE
);
951 out
->stc
[1] = LLVMBuildExtractElement(builder
, tmp
,
952 lp_build_const_int32(gallivm
, 0), "");
953 out
->stc
[0] = LLVMBuildExtractElement(builder
, tmp
,
954 lp_build_const_int32(gallivm
, 1), "");
955 out
->ma
= LLVMBuildExtractElement(builder
, tmp
,
956 lp_build_const_int32(gallivm
, 2), "");
957 out
->id
= LLVMBuildExtractElement(builder
, tmp
,
958 lp_build_const_int32(gallivm
, 3), "");
963 * Build a manual selection sequence for cube face sc/tc coordinates and
964 * major axis vector (multiplied by 2 for consistency) for the given
965 * vec3 \p coords, for the face implied by \p selcoords.
967 * For the major axis, we always adjust the sign to be in the direction of
968 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
969 * the selcoords major axis.
971 static void build_cube_select(LLVMBuilderRef builder
,
972 const struct cube_selection_coords
*selcoords
,
973 const LLVMValueRef
*coords
,
974 LLVMValueRef
*out_st
,
975 LLVMValueRef
*out_ma
)
977 LLVMTypeRef f32
= LLVMTypeOf(coords
[0]);
978 LLVMValueRef is_ma_positive
;
980 LLVMValueRef is_ma_z
, is_not_ma_z
;
981 LLVMValueRef is_ma_y
;
982 LLVMValueRef is_ma_x
;
986 is_ma_positive
= LLVMBuildFCmp(builder
, LLVMRealUGE
,
987 selcoords
->ma
, LLVMConstReal(f32
, 0.0), "");
988 sgn_ma
= LLVMBuildSelect(builder
, is_ma_positive
,
989 LLVMConstReal(f32
, 1.0), LLVMConstReal(f32
, -1.0), "");
991 is_ma_z
= LLVMBuildFCmp(builder
, LLVMRealUGE
, selcoords
->id
, LLVMConstReal(f32
, 4.0), "");
992 is_not_ma_z
= LLVMBuildNot(builder
, is_ma_z
, "");
993 is_ma_y
= LLVMBuildAnd(builder
, is_not_ma_z
,
994 LLVMBuildFCmp(builder
, LLVMRealUGE
, selcoords
->id
, LLVMConstReal(f32
, 2.0), ""), "");
995 is_ma_x
= LLVMBuildAnd(builder
, is_not_ma_z
, LLVMBuildNot(builder
, is_ma_y
, ""), "");
998 tmp
= LLVMBuildSelect(builder
, is_ma_z
, coords
[2], coords
[0], "");
999 sgn
= LLVMBuildSelect(builder
, is_ma_y
, LLVMConstReal(f32
, 1.0),
1000 LLVMBuildSelect(builder
, is_ma_x
, sgn_ma
,
1001 LLVMBuildFNeg(builder
, sgn_ma
, ""), ""), "");
1002 out_st
[0] = LLVMBuildFMul(builder
, tmp
, sgn
, "");
1005 tmp
= LLVMBuildSelect(builder
, is_ma_y
, coords
[2], coords
[1], "");
1006 sgn
= LLVMBuildSelect(builder
, is_ma_y
, LLVMBuildFNeg(builder
, sgn_ma
, ""),
1007 LLVMConstReal(f32
, -1.0), "");
1008 out_st
[1] = LLVMBuildFMul(builder
, tmp
, sgn
, "");
1011 tmp
= LLVMBuildSelect(builder
, is_ma_z
, coords
[2],
1012 LLVMBuildSelect(builder
, is_ma_y
, coords
[1], coords
[0], ""), "");
1013 sgn
= LLVMBuildSelect(builder
, is_ma_positive
,
1014 LLVMConstReal(f32
, 2.0), LLVMConstReal(f32
, -2.0), "");
1015 *out_ma
= LLVMBuildFMul(builder
, tmp
, sgn
, "");
1018 void si_prepare_cube_coords(struct lp_build_tgsi_context
*bld_base
,
1019 struct lp_build_emit_data
*emit_data
,
1020 LLVMValueRef
*coords_arg
,
1021 LLVMValueRef
*derivs_arg
)
1024 unsigned target
= emit_data
->inst
->Texture
.Texture
;
1025 unsigned opcode
= emit_data
->inst
->Instruction
.Opcode
;
1026 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1027 LLVMBuilderRef builder
= gallivm
->builder
;
1028 LLVMTypeRef type
= bld_base
->base
.elem_type
;
1029 struct cube_selection_coords selcoords
;
1030 LLVMValueRef coords
[3];
1033 build_cube_intrinsic(gallivm
, coords_arg
, &selcoords
);
1035 invma
= lp_build_intrinsic(builder
, "llvm.fabs.f32",
1036 type
, &selcoords
.ma
, 1, LP_FUNC_ATTR_READNONE
);
1037 invma
= lp_build_emit_llvm_unary(bld_base
, TGSI_OPCODE_RCP
, invma
);
1039 for (int i
= 0; i
< 2; ++i
)
1040 coords
[i
] = LLVMBuildFMul(builder
, selcoords
.stc
[i
], invma
, "");
1042 coords
[2] = selcoords
.id
;
1044 if (opcode
== TGSI_OPCODE_TXD
&& derivs_arg
) {
1045 LLVMValueRef derivs
[4];
1048 /* Convert cube derivatives to 2D derivatives. */
1049 for (axis
= 0; axis
< 2; axis
++) {
1050 LLVMValueRef deriv_st
[2];
1051 LLVMValueRef deriv_ma
;
1053 /* Transform the derivative alongside the texture
1054 * coordinate. Mathematically, the correct formula is
1055 * as follows. Assume we're projecting onto the +Z face
1056 * and denote by dx/dh the derivative of the (original)
1057 * X texture coordinate with respect to horizontal
1058 * window coordinates. The projection onto the +Z face
1063 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
1064 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
1066 * This motivatives the implementation below.
1068 * Whether this actually gives the expected results for
1069 * apps that might feed in derivatives obtained via
1070 * finite differences is anyone's guess. The OpenGL spec
1071 * seems awfully quiet about how textureGrad for cube
1072 * maps should be handled.
1074 build_cube_select(builder
, &selcoords
, &derivs_arg
[axis
* 3],
1075 deriv_st
, &deriv_ma
);
1077 deriv_ma
= LLVMBuildFMul(builder
, deriv_ma
, invma
, "");
1079 for (int i
= 0; i
< 2; ++i
)
1080 derivs
[axis
* 2 + i
] =
1081 LLVMBuildFSub(builder
,
1082 LLVMBuildFMul(builder
, deriv_st
[i
], invma
, ""),
1083 LLVMBuildFMul(builder
, deriv_ma
, coords
[i
], ""), "");
1086 memcpy(derivs_arg
, derivs
, sizeof(derivs
));
1089 /* Shift the texture coordinate. This must be applied after the
1090 * derivative calculation.
1092 for (int i
= 0; i
< 2; ++i
)
1093 coords
[i
] = LLVMBuildFAdd(builder
, coords
[i
], LLVMConstReal(type
, 1.5), "");
1095 if (target
== TGSI_TEXTURE_CUBE_ARRAY
||
1096 target
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
1097 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
1098 /* coords_arg.w component - array_index for cube arrays */
1099 coords
[2] = lp_build_emit_llvm_ternary(bld_base
, TGSI_OPCODE_MAD
,
1100 coords_arg
[3], lp_build_const_float(gallivm
, 8.0), coords
[2]);
1103 memcpy(coords_arg
, coords
, sizeof(coords
));