#include "libresoc_llvm.h"
+#include "libresoc_shader_args.h"
+#include "libresoc_llvm_build.h"
#include <llvm-c/OrcBindings.h>
#include <llvm-c/Core.h>
#include <llvm-c/Analysis.h>
#include "nir/nir.h"
#include "nir/nir_deref.h"
+#include <float.h>
+
+struct libresoc_nir_tran_ctx {
+ struct libresoc_llvm_context lc;
+ gl_shader_stage stage;
+ shader_info *info;
+
+ struct shader_args args;
+ LLVMValueRef *ssa_defs;
+
+ LLVMValueRef scratch;
+ LLVMValueRef constant_data;
+
+ struct hash_table *defs;
+ struct hash_table *phis;
+ struct hash_table *vars;
+ struct hash_table *verified_interp;
+
+ LLVMValueRef main_function;
+ LLVMBasicBlockRef continue_block;
+ LLVMBasicBlockRef break_block;
+
+ int num_locals;
+ LLVMValueRef *locals;
+};
void InitLLVM(struct libresoc_llvm *llvm_ref)
{
//assert(tm_ref);
LLVMDisposeErrorMessage(def_triple);
llvm_ref->orc_ref = LLVMOrcCreateInstance(tm_ref);
- llvm_ref->context = LLVMContextCreate();
- llvm_ref->builder = LLVMCreateBuilderInContext(llvm_ref->context);
+ llvm_ref->lc.context = LLVMContextCreate();
+ llvm_ref->lc.builder = LLVMCreateBuilderInContext(llvm_ref->lc.context);
+ llvm_ref->lc.voidt = LLVMVoidTypeInContext(llvm_ref->lc.context);
+ llvm_ref->lc.i1 = LLVMInt1TypeInContext(llvm_ref->lc.context);
+ llvm_ref->lc.i8 = LLVMInt8TypeInContext(llvm_ref->lc.context);
+ llvm_ref->lc.i16 = LLVMIntTypeInContext(llvm_ref->lc.context, 16);
+ llvm_ref->lc.i32 = LLVMIntTypeInContext(llvm_ref->lc.context, 32);
+ llvm_ref->lc.i64 = LLVMIntTypeInContext(llvm_ref->lc.context, 64);
+ llvm_ref->lc.i128 = LLVMIntTypeInContext(llvm_ref->lc.context, 128);
+ llvm_ref->lc.intptr = llvm_ref->lc.i32;
+ llvm_ref->lc.f16 = LLVMHalfTypeInContext(llvm_ref->lc.context);
+ llvm_ref->lc.f32 = LLVMFloatTypeInContext(llvm_ref->lc.context);
+ llvm_ref->lc.f64 = LLVMDoubleTypeInContext(llvm_ref->lc.context);
+ llvm_ref->lc.v2i16 = LLVMVectorType(llvm_ref->lc.i16, 2);
+ llvm_ref->lc.v4i16 = LLVMVectorType(llvm_ref->lc.i16, 4);
+ llvm_ref->lc.v2f16 = LLVMVectorType(llvm_ref->lc.f16, 2);
+ llvm_ref->lc.v4f16 = LLVMVectorType(llvm_ref->lc.f16, 4);
+ llvm_ref->lc.v2i32 = LLVMVectorType(llvm_ref->lc.i32, 2);
+ llvm_ref->lc.v3i32 = LLVMVectorType(llvm_ref->lc.i32, 3);
+ llvm_ref->lc.v4i32 = LLVMVectorType(llvm_ref->lc.i32, 4);
+ llvm_ref->lc.v2f32 = LLVMVectorType(llvm_ref->lc.f32, 2);
+ llvm_ref->lc.v3f32 = LLVMVectorType(llvm_ref->lc.f32, 3);
+ llvm_ref->lc.v4f32 = LLVMVectorType(llvm_ref->lc.f32, 4);
+ llvm_ref->lc.v8i32 = LLVMVectorType(llvm_ref->lc.i32, 8);
+ // llvm_ref->lc.iN_wavemask = LLVMIntTypeInContext(llvm_ref->lc.context, llvm_ref->lc.wave_size);
+ // llvm_ref->lc.iN_ballotmask = LLVMIntTypeInContext(llvm_ref->lc.context, ballot_mask_bits);
+
+ llvm_ref->lc.i8_0 = LLVMConstInt(llvm_ref->lc.i8, 0, false);
+ llvm_ref->lc.i8_1 = LLVMConstInt(llvm_ref->lc.i8, 1, false);
+ llvm_ref->lc.i16_0 = LLVMConstInt(llvm_ref->lc.i16, 0, false);
+ llvm_ref->lc.i16_1 = LLVMConstInt(llvm_ref->lc.i16, 1, false);
+ llvm_ref->lc.i32_0 = LLVMConstInt(llvm_ref->lc.i32, 0, false);
+ llvm_ref->lc.i32_1 = LLVMConstInt(llvm_ref->lc.i32, 1, false);
+ llvm_ref->lc.i64_0 = LLVMConstInt(llvm_ref->lc.i64, 0, false);
+ llvm_ref->lc.i64_1 = LLVMConstInt(llvm_ref->lc.i64, 1, false);
+ llvm_ref->lc.i128_0 = LLVMConstInt(llvm_ref->lc.i128, 0, false);
+ llvm_ref->lc.i128_1 = LLVMConstInt(llvm_ref->lc.i128, 1, false);
+ llvm_ref->lc.f16_0 = LLVMConstReal(llvm_ref->lc.f16, 0.0);
+ llvm_ref->lc.f16_1 = LLVMConstReal(llvm_ref->lc.f16, 1.0);
+ llvm_ref->lc.f32_0 = LLVMConstReal(llvm_ref->lc.f32, 0.0);
+ llvm_ref->lc.f32_1 = LLVMConstReal(llvm_ref->lc.f32, 1.0);
+ llvm_ref->lc.f64_0 = LLVMConstReal(llvm_ref->lc.f64, 0.0);
+ llvm_ref->lc.f64_1 = LLVMConstReal(llvm_ref->lc.f64, 1.0);
+
+ llvm_ref->lc.i1false = LLVMConstInt(llvm_ref->lc.i1, 0, false);
+ llvm_ref->lc.i1true = LLVMConstInt(llvm_ref->lc.i1, 1, false);
+ llvm_ref->lc.float_mode = 0; //TODO: default value, when required take this value as parameter
}
void DestroyLLVM(struct libresoc_llvm *llvm_ref)
return (uint64_t)address;
}
-void libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir)
+static LLVMTypeRef arg_llvm_type(enum arg_type type, unsigned size, struct libresoc_llvm_context *ctx)
+{
+ if (type == ARG_FLOAT) {
+ return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
+ } else if (type == ARG_INT) {
+ return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
+ } else {
+ LLVMTypeRef ptr_type;
+ switch (type) {
+ case ARG_CONST_PTR:
+ ptr_type = ctx->i8;
+ break;
+ case ARG_CONST_FLOAT_PTR:
+ ptr_type = ctx->f32;
+ break;
+ case ARG_CONST_PTR_PTR:
+ ptr_type = LLVMPointerType(ctx->i8, 0);
+ break;
+ case ARG_CONST_DESC_PTR:
+ ptr_type = ctx->v4i32;
+ break;
+ case ARG_CONST_IMAGE_PTR:
+ ptr_type = ctx->v8i32;
+ break;
+ default:
+ unreachable("unknown arg type");
+ }
+ if (size == 1) {
+ //return ac_array_in_const32_addr_space(ptr_type);
+ return LLVMPointerType(ptr_type, 0); //address space may be wrong
+ } else {
+ assert(size == 2);
+ return LLVMPointerType(ptr_type, 0);
+ }
+ }
+}
+static LLVMValueRef get_src(struct libresoc_nir_tran_ctx *ctx, nir_src src)
+{
+ assert(src.is_ssa);
+ // printf("index %d\n", src.ssa->index);
+ return ctx->ssa_defs[src.ssa->index];
+}
+
+static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
+{
+ uint32_t new_mask = 0;
+ for (unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
+ if (mask & (1u << i))
+ new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
+ return new_mask;
+}
+
+static void get_deref_offset(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr, bool vs_in,
+ unsigned *vertex_index_out, LLVMValueRef *vertex_index_ref,
+ unsigned *const_out, LLVMValueRef *indir_out)
+{
+ nir_variable *var = nir_deref_instr_get_variable(instr);
+ nir_deref_path path;
+ unsigned idx_lvl = 1;
+
+ nir_deref_path_init(&path, instr, NULL);
+
+ if (vertex_index_out != NULL || vertex_index_ref != NULL) {
+ if (vertex_index_ref) {
+ *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
+ if (vertex_index_out)
+ *vertex_index_out = 0;
+ } else {
+ *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
+ }
+ ++idx_lvl;
+ }
+
+ uint32_t const_offset = 0;
+ LLVMValueRef offset = NULL;
+
+ if (var->data.compact) {
+ assert(instr->deref_type == nir_deref_type_array);
+ const_offset = nir_src_as_uint(instr->arr.index);
+ goto out;
+ }
+
+ for (; path.path[idx_lvl]; ++idx_lvl) {
+ const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
+ if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
+ unsigned index = path.path[idx_lvl]->strct.index;
+
+ for (unsigned i = 0; i < index; i++) {
+ const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
+ const_offset += glsl_count_attribute_slots(ft, vs_in);
+ }
+ } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
+ unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
+ if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
+ const_offset += size * nir_src_as_uint(path.path[idx_lvl]->arr.index);
+ } else {
+ LLVMValueRef array_off =
+ LLVMBuildMul(ctx->lc.builder, LLVMConstInt(ctx->lc.i32, size, 0),
+ get_src(ctx, path.path[idx_lvl]->arr.index), "");
+ if (offset)
+ offset = LLVMBuildAdd(ctx->lc.builder, offset, array_off, "");
+ else
+ offset = array_off;
+ }
+ } else
+ unreachable("Uhandled deref type in get_deref_instr_offset");
+ }
+
+out:
+ nir_deref_path_finish(&path);
+
+ if (const_offset && offset)
+ offset =
+ LLVMBuildAdd(ctx->lc.builder, offset, LLVMConstInt(ctx->lc.i32, const_offset, 0), "");
+
+ *const_out = const_offset;
+ *indir_out = offset;
+}
+
+static unsigned type_scalar_size_bytes(const struct glsl_type *type)
+{
+ assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
+ return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+}
+
+
+static LLVMValueRef emit_int_cmp(struct libresoc_llvm_context *lc, LLVMIntPredicate pred,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ LLVMTypeRef src0_type = LLVMTypeOf(src0);
+ LLVMTypeRef src1_type = LLVMTypeOf(src1);
+
+ if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+ src1 = LLVMBuildIntToPtr(lc->builder, src1, src0_type, "");
+ } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
+ src0 = LLVMBuildIntToPtr(lc->builder, src0, src1_type, "");
+ }
+
+ LLVMValueRef result = LLVMBuildICmp(lc->builder, pred, src0, src1, "");
+ return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false),
+ lc->i32_0, "");
+}
+
+static LLVMValueRef emit_float_cmp(struct libresoc_llvm_context *lc, LLVMRealPredicate pred,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ LLVMValueRef result;
+ src0 = to_float(lc, src0);
+ src1 = to_float(lc, src1);
+ result = LLVMBuildFCmp(lc->builder, pred, src0, src1, "");
+ return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false),
+ lc->i32_0, "");
+}
+
+static LLVMValueRef emit_intrin_1f_param(struct libresoc_llvm_context *lc, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0)
+{
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ to_float(lc, src0),
+ };
+
+ build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ return build_intrinsic(lc, name, result_type, params, 1, FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef emit_intrin_1f_param_scalar(struct libresoc_llvm_context *lc, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0)
+{
+ if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
+ return emit_intrin_1f_param(lc, intrin, result_type, src0);
+
+ LLVMTypeRef elem_type = LLVMGetElementType(result_type);
+ LLVMValueRef ret = LLVMGetUndef(result_type);
+
+ /* Scalarize the intrinsic, because vectors are not supported. */
+ for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ to_float(lc, llvm_extract_elem(lc, src0, i)),
+ };
+
+ build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ ret = LLVMBuildInsertElement(
+ lc->builder, ret,
+ build_intrinsic(lc, name, elem_type, params, 1, FUNC_ATTR_READNONE),
+ LLVMConstInt(lc->i32, i, 0), "");
+ }
+ return ret;
+}
+
+static LLVMValueRef emit_intrin_2f_param(struct libresoc_llvm_context *ctx, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0,
+ LLVMValueRef src1)
+{
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ to_float(ctx, src0),
+ to_float(ctx, src1),
+ };
+
+ build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ return build_intrinsic(ctx, name, result_type, params, 2, FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef emit_intrin_3f_param(struct libresoc_llvm_context *ctx, const char *intrin,
+ LLVMTypeRef result_type, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2)
+{
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ to_float(ctx, src0),
+ to_float(ctx, src1),
+ to_float(ctx, src2),
+ };
+
+ build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ return build_intrinsic(ctx, name, result_type, params, 3, FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef emit_bcsel(struct libresoc_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
+ LLVMValueRef src2)
+{
+ LLVMTypeRef src1_type = LLVMTypeOf(src1);
+ LLVMTypeRef src2_type = LLVMTypeOf(src2);
+
+ if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
+ src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
+ } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+ src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
+ }
+
+ LLVMValueRef v =
+ LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, LLVMConstNull(LLVMTypeOf(src0)), "");
+ return LLVMBuildSelect(ctx->builder, v, to_integer_or_pointer(ctx, src1),
+ to_integer_or_pointer(ctx, src2), "");
+}
+
+static LLVMValueRef emit_iabs(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ return build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
+}
+
+static LLVMValueRef emit_uint_carry(struct libresoc_llvm_context *ctx, const char *intrin,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ LLVMTypeRef ret_type;
+ LLVMTypeRef types[] = {ctx->i32, ctx->i1};
+ LLVMValueRef res;
+ LLVMValueRef params[] = {src0, src1};
+ ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
+
+ res = build_intrinsic(ctx, intrin, ret_type, params, 2, FUNC_ATTR_READNONE);
+
+ res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
+ res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
+ return res;
+}
+
+static LLVMValueRef emit_b2f(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
+{
+ assert(get_elem_bits(ctx, LLVMTypeOf(src0)) == 32);
+ LLVMValueRef result =
+ LLVMBuildAnd(ctx->builder, src0, const_uint_vec(ctx, LLVMTypeOf(src0), 0x3f800000), "");
+ result = to_float(ctx, result);
+
+ switch (bitsize) {
+ case 16: {
+ bool vec2 = LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind;
+ return LLVMBuildFPTrunc(ctx->builder, result, vec2 ? ctx->v2f16 : ctx->f16, "");
+ }
+ case 32:
+ return result;
+ case 64:
+ return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
+}
+
+static LLVMValueRef emit_f2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ src0 = to_float(ctx, src0);
+ LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
+ return LLVMBuildSExt(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
+ ctx->i32, "");
+}
+
+static LLVMValueRef emit_b2i(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
{
- LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod");
- LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() };
- LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0);
- LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type);
- LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry");
- LLVMBuilderRef builder = LLVMCreateBuilder();
- LLVMPositionBuilderAtEnd(builder, entry);
- LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp");
- LLVMBuildRet(builder, tmp);
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
+
+ switch (bitsize) {
+ case 8:
+ return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
+ case 16:
+ return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
+ case 32:
+ return result;
+ case 64:
+ return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
+}
+
+static LLVMValueRef emit_i2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
+ return LLVMBuildSExt(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
+ ctx->i32, "");
+}
+
+static LLVMValueRef emit_f2f16(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ LLVMValueRef cond = NULL;
+
+ src0 = to_float(ctx, src0);
+ result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
+
+ /* need to convert back up to f32 */
+ result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+ return result;
+}
+
+static LLVMValueRef emit_umul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1)
+{
+ LLVMValueRef dst64, result;
+ src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
+ src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
+
+ dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
+ dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
+ result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
+ return result;
+}
+
+static LLVMValueRef emit_imul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1)
+{
+ LLVMValueRef dst64, result;
+ src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
+ src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
+
+ dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
+ dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
+ result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
+ return result;
+}
+
+static LLVMValueRef emit_bfm(struct libresoc_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
+{
+ /* mask = ((1 << bits) - 1) << offset */
+ return LLVMBuildShl(
+ ctx->builder,
+ LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
+ offset, "");
+}
+
+static LLVMValueRef emit_bitfield_select(struct libresoc_llvm_context *ctx, LLVMValueRef mask,
+ LLVMValueRef insert, LLVMValueRef base)
+{
+ /* Calculate:
+ * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
+ * Use the right-hand side, which the LLVM backend can convert to V_BFI.
+ */
+ return LLVMBuildXor(
+ ctx->builder, base,
+ LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
+}
+
+static LLVMValueRef emit_pack_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef (*pack)(struct libresoc_llvm_context *ctx,
+ LLVMValueRef args[2]))
+{
+ LLVMValueRef comp[2];
+
+ src0 = to_float(ctx, src0);
+ comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
+ comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
+
+ return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
+}
+
+static LLVMValueRef emit_unpack_half_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
+ LLVMValueRef temps[2], val;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
+ val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
+ val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
+ temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
+ }
+ return build_gather_values(ctx, temps, 2);
+}
+
+// TODO: enable this whn ac_builddxy() is added
+// static LLVMValueRef emit_ddxy(struct libresoc_nir_context *ctx, nir_op op, LLVMValueRef src0)
+// {
+// unsigned mask;
+// int idx;
+// LLVMValueRef result;
+
+// if (op == nir_op_fddx_fine)
+// mask = TID_MASK_LEFT;
+// else if (op == nir_op_fddy_fine)
+// mask = TID_MASK_TOP;
+// else
+// mask = TID_MASK_TOP_LEFT;
+
+// /* for DDX we want to next X pixel, DDY next Y pixel. */
+// if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
+// idx = 1;
+// else
+// idx = 2;
+
+// result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
+// return result;
+// }
+
+static void setup_locals(struct libresoc_nir_tran_ctx *ctx, struct nir_function *func)
+{
+ int i, j;
+ ctx->num_locals = 0;
+ nir_foreach_function_temp_variable(variable, func->impl)
+ {
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+ variable->data.driver_location = ctx->num_locals * 4;
+ variable->data.location_frac = 0;
+ ctx->num_locals += attrib_count;
+ }
+ ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
+ if (!ctx->locals)
+ return;
+
+ for (i = 0; i < ctx->num_locals; i++) {
+ for (j = 0; j < 4; j++) {
+ ctx->locals[i * 4 + j] = build_alloca_undef(&ctx->lc, ctx->lc.f32, "temp");
+ }
+ }
+}
+
+static void setup_scratch(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader)
+{
+ if (shader->scratch_size == 0)
+ return;
+
+ ctx->scratch =
+ build_alloca_undef(&ctx->lc, LLVMArrayType(ctx->lc.i8, shader->scratch_size), "scratch");
+}
+
+static void setup_constant_data(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader)
+{
+ if (!shader->constant_data)
+ return;
+
+ LLVMValueRef data = LLVMConstStringInContext(ctx->lc.context, shader->constant_data,
+ shader->constant_data_size, true);
+ LLVMTypeRef type = LLVMArrayType(ctx->lc.i8, shader->constant_data_size);
+
+ unsigned address_space = 0; //TODO: dummay value
+ LLVMValueRef global =
+ LLVMAddGlobalInAddressSpace(*(ctx->lc.module), type, "const_data", address_space);
+
+ LLVMSetInitializer(global, data);
+ LLVMSetGlobalConstant(global, true);
+ LLVMSetVisibility(global, LLVMHiddenVisibility);
+ ctx->constant_data = global;
+}
+
+static LLVMTypeRef glsl_base_to_llvm_type(struct libresoc_llvm_context *lc, enum glsl_base_type type)
+{
+ switch (type) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_SUBROUTINE:
+ return lc->i32;
+ case GLSL_TYPE_INT8:
+ case GLSL_TYPE_UINT8:
+ return lc->i8;
+ case GLSL_TYPE_INT16:
+ case GLSL_TYPE_UINT16:
+ return lc->i16;
+ case GLSL_TYPE_FLOAT:
+ return lc->f32;
+ case GLSL_TYPE_FLOAT16:
+ return lc->f16;
+ case GLSL_TYPE_INT64:
+ case GLSL_TYPE_UINT64:
+ return lc->i64;
+ case GLSL_TYPE_DOUBLE:
+ return lc->f64;
+ default:
+ unreachable("unknown GLSL type");
+ }
+}
+
+static LLVMTypeRef glsl_to_llvm_type(struct libresoc_llvm_context *lc, const struct glsl_type *type)
+{
+ if (glsl_type_is_scalar(type)) {
+ return glsl_base_to_llvm_type(lc, glsl_get_base_type(type));
+ }
+
+ if (glsl_type_is_vector(type)) {
+ return LLVMVectorType(glsl_base_to_llvm_type(lc, glsl_get_base_type(type)),
+ glsl_get_vector_elements(type));
+ }
+
+ if (glsl_type_is_matrix(type)) {
+ return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_column_type(type)),
+ glsl_get_matrix_columns(type));
+ }
+
+ if (glsl_type_is_array(type)) {
+ return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_array_element(type)),
+ glsl_get_length(type));
+ }
+
+ assert(glsl_type_is_struct_or_ifc(type));
+
+ LLVMTypeRef member_types[glsl_get_length(type)];
+
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ member_types[i] = glsl_to_llvm_type(lc, glsl_get_struct_field(type, i));
+ }
+
+ return LLVMStructTypeInContext(lc->context, member_types, glsl_get_length(type), false);
+}
+
+static void visit_load_const(struct libresoc_nir_tran_ctx *ctx, const nir_load_const_instr *instr)
+{
+ LLVMValueRef values[4], value = NULL;
+ LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size);
+
+ for (unsigned i = 0; i < instr->def.num_components; ++i) {
+ switch (instr->def.bit_size) {
+ case 8:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
+ break;
+ case 16:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
+ break;
+ case 32:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
+ break;
+ case 64:
+ values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
+ break;
+ default:
+ fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
+ abort();
+ }
+ }
+ if (instr->def.num_components > 1) {
+ value = LLVMConstVector(values, instr->def.num_components);
+ } else
+ value = values[0];
+
+ ctx->ssa_defs[instr->def.index] = value;
+}
+
+static void visit_deref(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr)
+{
+ if (instr->mode != nir_var_mem_shared && instr->mode != nir_var_mem_global)
+ return;
+
+ LLVMValueRef result = NULL;
+ switch (instr->deref_type) {
+ case nir_deref_type_var: {
+ struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
+ result = entry->data;
+ break;
+ }
+ case nir_deref_type_struct:
+ if (instr->mode == nir_var_mem_global) {
+ nir_deref_instr *parent = nir_deref_instr_parent(instr);
+ uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
+ result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent),
+ LLVMConstInt(ctx->lc.i32, offset, 0));
+ } else {
+ result = build_gep0(&ctx->lc, get_src(ctx, instr->parent),
+ LLVMConstInt(ctx->lc.i32, instr->strct.index, 0));
+ }
+ break;
+ case nir_deref_type_array:
+ if (instr->mode == nir_var_mem_global) {
+ nir_deref_instr *parent = nir_deref_instr_parent(instr);
+ unsigned stride = glsl_get_explicit_stride(parent->type);
+
+ if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
+ (glsl_type_is_vector(parent->type) && stride == 0))
+ stride = type_scalar_size_bytes(parent->type);
+
+ assert(stride > 0);
+ LLVMValueRef index = get_src(ctx, instr->arr.index);
+ if (LLVMTypeOf(index) != ctx->lc.i64)
+ index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, "");
+
+ LLVMValueRef offset =
+ LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), "");
+
+ result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset);
+ } else {
+ result =
+ build_gep0(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
+ }
+ break;
+ case nir_deref_type_ptr_as_array:
+ if (instr->mode == nir_var_mem_global) {
+ unsigned stride = nir_deref_instr_array_stride(instr);
+
+ LLVMValueRef index = get_src(ctx, instr->arr.index);
+ if (LLVMTypeOf(index) != ctx->lc.i64)
+ index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, "");
+
+ LLVMValueRef offset =
+ LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), "");
+
+ result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset);
+ } else {
+ result =
+ build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
+ }
+ break;
+ case nir_deref_type_cast: {
+ result = get_src(ctx, instr->parent);
+
+ /* We can't use the structs from LLVM because the shader
+ * specifies its own offsets. */
+ LLVMTypeRef pointee_type = ctx->lc.i8;
+ if (instr->mode == nir_var_mem_shared)
+ pointee_type = glsl_to_llvm_type(&ctx->lc, instr->type);
+
+ unsigned address_space;
+
+ switch (instr->mode) {
+ case nir_var_mem_shared:
+ address_space = 1;
+ break;
+ case nir_var_mem_global:
+ address_space = 0;
+ break;
+ default:
+ unreachable("Unhandled address space");
+ }
+
+ LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
+
+ if (LLVMTypeOf(result) != type) {
+ if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
+ result = LLVMBuildBitCast(ctx->lc.builder, result, type, "");
+ } else {
+ result = LLVMBuildIntToPtr(ctx->lc.builder, result, type, "");
+ }
+ }
+ break;
+ }
+ default:
+ unreachable("Unhandled deref_instr deref type");
+ }
+
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+}
+
+static LLVMTypeRef get_def_type(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_def *def)
+{
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, def->bit_size);
+ if (def->num_components > 1) {
+ type = LLVMVectorType(type, def->num_components);
+ }
+ return type;
+}
+
+static void visit_phi(struct libresoc_nir_tran_ctx *ctx, nir_phi_instr *instr)
+{
+ LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMValueRef result = LLVMBuildPhi(ctx->lc.builder, type, "");
+
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ _mesa_hash_table_insert(ctx->phis, instr, result);
+}
+
+static bool is_def_used_in_an_export(const nir_ssa_def *def)
+{
+ nir_foreach_use (use_src, def) {
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+ if (instr->intrinsic == nir_intrinsic_store_deref)
+ return true;
+ } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+ if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static void visit_ssa_undef(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_undef_instr *instr)
+{
+ unsigned num_components = instr->def.num_components;
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size);
+
+ if (/*!ctx->abi->convert_undef_to_zero ||*/ is_def_used_in_an_export(&instr->def)) {
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+ } else {
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ if (num_components > 1) {
+ zero = build_gather_values_extended(&ctx->lc, &zero, 4, 0, false, false);
+ }
+ ctx->ssa_defs[instr->def.index] = zero;
+ }
+}
+
+static void visit_jump(struct libresoc_llvm_context *lc, const nir_jump_instr *instr)
+{
+ switch (instr->type) {
+ case nir_jump_break:
+ build_break(lc);
+ break;
+ case nir_jump_continue:
+ build_continue(lc);
+ break;
+ default:
+ fprintf(stderr, "Unknown NIR jump instr: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+}
+
+static LLVMValueRef get_alu_src(struct libresoc_nir_tran_ctx *ctx, nir_alu_src src,
+ unsigned num_components)
+{
+ LLVMValueRef value = get_src(ctx, src.src);
+ bool need_swizzle = false;
+
+ assert(value);
+ unsigned src_components = get_llvm_num_components(value);
+ for (unsigned i = 0; i < num_components; ++i) {
+ assert(src.swizzle[i] < src_components);
+ if (src.swizzle[i] != i)
+ need_swizzle = true;
+ }
+
+ if (need_swizzle || num_components != src_components) {
+ LLVMValueRef masks[] = {LLVMConstInt(ctx->lc.i32, src.swizzle[0], false),
+ LLVMConstInt(ctx->lc.i32, src.swizzle[1], false),
+ LLVMConstInt(ctx->lc.i32, src.swizzle[2], false),
+ LLVMConstInt(ctx->lc.i32, src.swizzle[3], false)};
+
+ if (src_components > 1 && num_components == 1) {
+ value = LLVMBuildExtractElement(ctx->lc.builder, value, masks[0], "");
+ } else if (src_components == 1 && num_components > 1) {
+ LLVMValueRef values[] = {value, value, value, value};
+ value = build_gather_values(&ctx->lc, values, num_components);
+ } else {
+ LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
+ value = LLVMBuildShuffleVector(ctx->lc.builder, value, value, swizzle, "");
+ }
+ }
+ assert(!src.negate);
+ assert(!src.abs);
+ return value;
+}
+
+static void visit_alu(struct libresoc_nir_tran_ctx *ctx, const nir_alu_instr *instr)
+{
+ LLVMValueRef src[4], result = NULL;
+ unsigned num_components = instr->dest.dest.ssa.num_components;
+ unsigned src_components;
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
+
+ assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
+ switch (instr->op) {
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ src_components = 1;
+ break;
+ case nir_op_pack_half_2x16:
+ case nir_op_pack_snorm_2x16:
+ case nir_op_pack_unorm_2x16:
+ src_components = 2;
+ break;
+ case nir_op_unpack_half_2x16:
+ src_components = 1;
+ break;
+ case nir_op_cube_face_coord:
+ case nir_op_cube_face_index:
+ src_components = 3;
+ break;
+ default:
+ src_components = num_components;
+ break;
+ }
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ src[i] = get_alu_src(ctx, instr->src[i], src_components);
+
+ switch (instr->op) {
+ case nir_op_mov:
+ result = src[0];
+ break;
+ case nir_op_fneg:
+ src[0] = to_float(&ctx->lc, src[0]);
+ result = LLVMBuildFNeg(ctx->lc.builder, src[0], "");
+ if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
+ /* fneg will be optimized by backend compiler with sign
+ * bit removed via XOR. This is probably a LLVM bug.
+ */
+ result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size);
+ }
+ break;
+ case nir_op_ineg:
+ result = LLVMBuildNeg(ctx->lc.builder, src[0], "");
+ break;
+ case nir_op_inot:
+ result = LLVMBuildNot(ctx->lc.builder, src[0], "");
+ break;
+ case nir_op_iadd:
+ result = LLVMBuildAdd(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_fadd:
+ src[0] = to_float(&ctx->lc, src[0]);
+ src[1] = to_float(&ctx->lc, src[1]);
+ result = LLVMBuildFAdd(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_fsub:
+ src[0] = to_float(&ctx->lc, src[0]);
+ src[1] = to_float(&ctx->lc, src[1]);
+ result = LLVMBuildFSub(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_isub:
+ result = LLVMBuildSub(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_imul:
+ result = LLVMBuildMul(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_imod:
+ result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_umod:
+ result = LLVMBuildURem(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_irem:
+ result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_idiv:
+ result = LLVMBuildSDiv(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_udiv:
+ result = LLVMBuildUDiv(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_fmul:
+ src[0] = to_float(&ctx->lc, src[0]);
+ src[1] = to_float(&ctx->lc, src[1]);
+ result = LLVMBuildFMul(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_frcp:
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL && get_type_size(def_type) == 8) {
+ result = LLVMBuildFDiv(ctx->lc.builder, ctx->lc.f64_1, to_float(&ctx->lc, src[0]), "");
+ } else {
+ result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rcp",
+ to_float_type(&ctx->lc, def_type), src[0]);
+ }
+ // TODO: abi not supported
+ // if (ctx->abi->clamp_div_by_zero)
+ // result = build_fmin(&ctx->lc, result,
+ // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
+ break;
+ case nir_op_iand:
+ result = LLVMBuildAnd(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_ior:
+ result = LLVMBuildOr(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_ixor:
+ result = LLVMBuildXor(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_ishl:
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
+ get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
+ else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
+ get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
+ result = LLVMBuildShl(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_ishr:
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
+ get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
+ else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
+ get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
+ result = LLVMBuildAShr(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_ushr:
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
+ get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
+ else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
+ get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
+ result = LLVMBuildLShr(ctx->lc.builder, src[0], src[1], "");
+ break;
+ case nir_op_ilt32:
+ result = emit_int_cmp(&ctx->lc, LLVMIntSLT, src[0], src[1]);
+ break;
+ case nir_op_ine32:
+ result = emit_int_cmp(&ctx->lc, LLVMIntNE, src[0], src[1]);
+ break;
+ case nir_op_ieq32:
+ result = emit_int_cmp(&ctx->lc, LLVMIntEQ, src[0], src[1]);
+ break;
+ case nir_op_ige32:
+ result = emit_int_cmp(&ctx->lc, LLVMIntSGE, src[0], src[1]);
+ break;
+ case nir_op_ult32:
+ result = emit_int_cmp(&ctx->lc, LLVMIntULT, src[0], src[1]);
+ break;
+ case nir_op_uge32:
+ result = emit_int_cmp(&ctx->lc, LLVMIntUGE, src[0], src[1]);
+ break;
+ case nir_op_feq32:
+ result = emit_float_cmp(&ctx->lc, LLVMRealOEQ, src[0], src[1]);
+ break;
+ case nir_op_fneu32:
+ result = emit_float_cmp(&ctx->lc, LLVMRealUNE, src[0], src[1]);
+ break;
+ case nir_op_flt32:
+ result = emit_float_cmp(&ctx->lc, LLVMRealOLT, src[0], src[1]);
+ break;
+ case nir_op_fge32:
+ result = emit_float_cmp(&ctx->lc, LLVMRealOGE, src[0], src[1]);
+ break;
+ case nir_op_fabs:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.fabs", to_float_type(&ctx->lc, def_type), src[0]);
+ if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
+ /* fabs will be optimized by backend compiler with sign
+ * bit removed via AND.
+ */
+ result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size);
+ }
+ break;
+ case nir_op_iabs:
+ result = emit_iabs(&ctx->lc, src[0]);
+ break;
+ case nir_op_imax:
+ result = build_imax(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_imin:
+ result = build_imin(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_umax:
+ result = build_umax(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_umin:
+ result = build_umin(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_isign:
+ result = build_isign(&ctx->lc, src[0]);
+ break;
+ case nir_op_fsign:
+ src[0] = to_float(&ctx->lc, src[0]);
+ result = build_fsign(&ctx->lc, src[0]);
+ break;
+ case nir_op_ffloor:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.floor", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_ftrunc:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.trunc", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_fceil:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.ceil", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_fround_even:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.rint", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_ffract:
+ result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.fract",
+ to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_fsin:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.sin", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_fcos:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.cos", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_fsqrt:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.sqrt", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_fexp2:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.exp2", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_flog2:
+ result =
+ emit_intrin_1f_param(&ctx->lc, "llvm.log2", to_float_type(&ctx->lc, def_type), src[0]);
+ break;
+ case nir_op_frsq:
+ result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rsq",
+ to_float_type(&ctx->lc, def_type), src[0]);
+ // TODO: abi not enabled
+ // if (ctx->abi->clamp_div_by_zero)
+ // result = build_fmin(&ctx->lc, result,
+ // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
+ break;
+ case nir_op_frexp_exp:
+ // TODO: enable this when ac_build_frexp_exp() is added
+ // src[0] = to_float(&ctx->lc, src[0]);
+ // result = ac_build_frexp_exp(&ctx->lc, src[0], get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])));
+ // if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) == 16)
+ // result = LLVMBuildSExt(ctx->lc.builder, result, ctx->lc.i32, "");
+ break;
+ case nir_op_frexp_sig:
+ // TODO: enable this when ac_build_frexp_mant() is added
+ // src[0] = to_float(&ctx->lc, src[0]);
+ // result = ac_build_frexp_mant(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
+ // break;
+ // case nir_op_fpow:
+ // result = emit_intrin_2f_param(&ctx->lc, "llvm.pow", to_float_type(&ctx->lc, def_type),
+ // src[0], src[1]);
+ break;
+ case nir_op_fmax:
+ result = emit_intrin_2f_param(&ctx->lc, "llvm.maxnum", to_float_type(&ctx->lc, def_type),
+ src[0], src[1]);
+ break;
+ case nir_op_fmin:
+ result = emit_intrin_2f_param(&ctx->lc, "llvm.minnum", to_float_type(&ctx->lc, def_type),
+ src[0], src[1]);
+ break;
+ case nir_op_ffma:
+ result =
+ emit_intrin_3f_param(&ctx->lc, "llvm.fmuladd",
+ to_float_type(&ctx->lc, def_type), src[0], src[1], src[2]);
+ break;
+ case nir_op_ldexp:
+ src[0] = to_float(&ctx->lc, src[0]);
+ if (get_elem_bits(&ctx->lc, def_type) == 32)
+ result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f32", ctx->lc.f32, src, 2,
+ FUNC_ATTR_READNONE);
+ else if (get_elem_bits(&ctx->lc, def_type) == 16)
+ result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f16", ctx->lc.f16, src, 2,
+ FUNC_ATTR_READNONE);
+ else
+ result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f64", ctx->lc.f64, src, 2,
+ FUNC_ATTR_READNONE);
+ break;
+ case nir_op_bfm:
+ result = emit_bfm(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_bitfield_select:
+ result = emit_bitfield_select(&ctx->lc, src[0], src[1], src[2]);
+ break;
+ case nir_op_ubfe:
+ result = build_bfe(&ctx->lc, src[0], src[1], src[2], false);
+ break;
+ case nir_op_ibfe:
+ result = build_bfe(&ctx->lc, src[0], src[1], src[2], true);
+ break;
+ case nir_op_bitfield_reverse:
+ result = build_bitfield_reverse(&ctx->lc, src[0]);
+ break;
+ case nir_op_bit_count:
+ result = build_bit_count(&ctx->lc, src[0]);
+ break;
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ src[i] = to_integer(&ctx->lc, src[i]);
+ result = build_gather_values(&ctx->lc, src, num_components);
+ break;
+ case nir_op_f2i8:
+ case nir_op_f2i16:
+ case nir_op_f2i32:
+ case nir_op_f2i64:
+ src[0] = to_float(&ctx->lc, src[0]);
+ result = LLVMBuildFPToSI(ctx->lc.builder, src[0], def_type, "");
+ break;
+ case nir_op_f2u8:
+ case nir_op_f2u16:
+ case nir_op_f2u32:
+ case nir_op_f2u64:
+ src[0] = to_float(&ctx->lc, src[0]);
+ result = LLVMBuildFPToUI(ctx->lc.builder, src[0], def_type, "");
+ break;
+ case nir_op_i2f16:
+ case nir_op_i2f32:
+ case nir_op_i2f64:
+ result = LLVMBuildSIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
+ break;
+ case nir_op_u2f16:
+ case nir_op_u2f32:
+ case nir_op_u2f64:
+ result = LLVMBuildUIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
+ break;
+ case nir_op_f2f16_rtz:
+ case nir_op_f2f16:
+ case nir_op_f2fmp:
+ src[0] = to_float(&ctx->lc, src[0]);
+
+ /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
+ * all f32->f16 conversions have to round towards zero, because both scalar
+ * and vec2 down-conversions have to round equally.
+ */
+ if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
+ src[0] = to_float(&ctx->lc, src[0]);
+
+ if (LLVMTypeOf(src[0]) == ctx->lc.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->lc.builder, src[0], ctx->lc.f32, "");
+
+ /* Fast path conversion. This only works if NIR is vectorized
+ * to vec2 16.
+ */
+ if (LLVMTypeOf(src[0]) == ctx->lc.v2f32) {
+ LLVMValueRef args[] = {
+ llvm_extract_elem(&ctx->lc, src[0], 0),
+ llvm_extract_elem(&ctx->lc, src[0], 1),
+ };
+ result = build_cvt_pkrtz_f16(&ctx->lc, args);
+ break;
+ }
+
+ assert(get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->lc.f32)};
+ result = build_cvt_pkrtz_f16(&ctx->lc, param);
+ result = LLVMBuildExtractElement(ctx->lc.builder, result, ctx->lc.i32_0, "");
+ } else {
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
+ result =
+ LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
+ else
+ result =
+ LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
+ }
+ break;
+ case nir_op_f2f16_rtne:
+ case nir_op_f2f32:
+ case nir_op_f2f64:
+ src[0] = to_float(&ctx->lc, src[0]);
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
+ result = LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
+ else
+ result =
+ LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
+ break;
+ case nir_op_u2u8:
+ case nir_op_u2u16:
+ case nir_op_u2ump:
+ case nir_op_u2u32:
+ case nir_op_u2u64:
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
+ result = LLVMBuildZExt(ctx->lc.builder, src[0], def_type, "");
+ else
+ result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, "");
+ break;
+ case nir_op_i2i8:
+ case nir_op_i2i16:
+ case nir_op_i2imp:
+ case nir_op_i2i32:
+ case nir_op_i2i64:
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
+ result = LLVMBuildSExt(ctx->lc.builder, src[0], def_type, "");
+ else
+ result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, "");
+ break;
+ case nir_op_b32csel:
+ result = emit_bcsel(&ctx->lc, src[0], src[1], src[2]);
+ break;
+ case nir_op_find_lsb:
+ result = find_lsb(&ctx->lc, ctx->lc.i32, src[0]);
+ break;
+ case nir_op_ufind_msb:
+ result = build_umsb(&ctx->lc, src[0], ctx->lc.i32);
+ break;
+ case nir_op_ifind_msb:
+ result = build_imsb(&ctx->lc, src[0], ctx->lc.i32);
+ break;
+ case nir_op_uadd_carry:
+ result = emit_uint_carry(&ctx->lc, "llvm.uadd.with.overflow.i32", src[0], src[1]);
+ break;
+ case nir_op_usub_borrow:
+ result = emit_uint_carry(&ctx->lc, "llvm.usub.with.overflow.i32", src[0], src[1]);
+ break;
+ case nir_op_b2f16:
+ case nir_op_b2f32:
+ case nir_op_b2f64:
+ result = emit_b2f(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_f2b32:
+ result = emit_f2b(&ctx->lc, src[0]);
+ break;
+ case nir_op_b2i8:
+ case nir_op_b2i16:
+ case nir_op_b2i32:
+ case nir_op_b2i64:
+ result = emit_b2i(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_i2b32:
+ result = emit_i2b(&ctx->lc, src[0]);
+ break;
+ case nir_op_fquantize2f16:
+ result = emit_f2f16(&ctx->lc, src[0]);
+ break;
+ case nir_op_umul_high:
+ result = emit_umul_high(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_imul_high:
+ result = emit_imul_high(&ctx->lc, src[0], src[1]);
+ break;
+ case nir_op_pack_half_2x16:
+ result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pkrtz_f16);
+ break;
+ case nir_op_pack_snorm_2x16:
+ result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_i16);
+ break;
+ case nir_op_pack_unorm_2x16:
+ result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_u16);
+ break;
+ case nir_op_unpack_half_2x16:
+ result = emit_unpack_half_2x16(&ctx->lc, src[0]);
+ break;
+ case nir_op_fddx:
+ case nir_op_fddy:
+ case nir_op_fddx_fine:
+ case nir_op_fddy_fine:
+ case nir_op_fddx_coarse:
+ case nir_op_fddy_coarse:
+ // TODO: enable this when emit_ddxy() is added
+ //result = emit_ddxy(ctx, instr->op, src[0]);
+ break;
+
+ case nir_op_unpack_64_2x32_split_x: {
+ assert(get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, "");
+ result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_64_2x32_split_y: {
+ assert(get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, "");
+ result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, "");
+ break;
+ }
+
+ case nir_op_pack_64_2x32_split: {
+ LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2);
+ result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i64, "");
+ break;
+ }
+
+ case nir_op_pack_32_2x16_split: {
+ LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2);
+ result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i32, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_x: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, "");
+ result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_y: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, "");
+ result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, "");
+ break;
+ }
+
+ case nir_op_cube_face_coord: {
+ src[0] = to_float(&ctx->lc, src[0]);
+ LLVMValueRef results[2];
+ LLVMValueRef in[3];
+ for (unsigned chan = 0; chan < 3; chan++)
+ in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan);
+ results[0] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubesc", ctx->lc.f32, in, 3,
+ FUNC_ATTR_READNONE);
+ results[1] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubetc", ctx->lc.f32, in, 3,
+ FUNC_ATTR_READNONE);
+ LLVMValueRef ma = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubema", ctx->lc.f32, in, 3,
+ FUNC_ATTR_READNONE);
+ results[0] = build_fdiv(&ctx->lc, results[0], ma);
+ results[1] = build_fdiv(&ctx->lc, results[1], ma);
+ LLVMValueRef offset = LLVMConstReal(ctx->lc.f32, 0.5);
+ results[0] = LLVMBuildFAdd(ctx->lc.builder, results[0], offset, "");
+ results[1] = LLVMBuildFAdd(ctx->lc.builder, results[1], offset, "");
+ result = build_gather_values(&ctx->lc, results, 2);
+ break;
+ }
+
+ case nir_op_cube_face_index: {
+ src[0] = to_float(&ctx->lc, src[0]);
+ LLVMValueRef in[3];
+ for (unsigned chan = 0; chan < 3; chan++)
+ in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan);
+ result = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubeid", ctx->lc.f32, in, 3,
+ FUNC_ATTR_READNONE);
+ break;
+ }
+
+ default:
+ fprintf(stderr, "Unknown NIR alu instr: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+
+ if (result) {
+ assert(instr->dest.dest.is_ssa);
+ result = to_integer_or_pointer(&ctx->lc, result);
+ ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
+ }
+}
+
+static LLVMValueRef visit_load_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
+{
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ LLVMValueRef values[8];
+ int idx = 0;
+ int ve = instr->dest.ssa.num_components;
+ unsigned comp = 0;
+ LLVMValueRef indir_index;
+ LLVMValueRef ret;
+ unsigned const_index;
+ unsigned stride = 4;
+ int mode = deref->mode;
+
+ if (var) {
+ bool vs_in = ctx->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in;
+ idx = var->data.driver_location;
+ comp = var->data.location_frac;
+ mode = var->data.mode;
+
+ get_deref_offset(ctx, deref, vs_in, NULL, NULL, &const_index, &indir_index);
+
+ if (var->data.compact) {
+ stride = 1;
+ const_index += comp;
+ comp = 0;
+ }
+ }
+
+ if (instr->dest.ssa.bit_size == 64 &&
+ (deref->mode == nir_var_shader_in || deref->mode == nir_var_shader_out ||
+ deref->mode == nir_var_function_temp))
+ ve *= 2;
+
+ switch (mode) {
+ case nir_var_shader_in:
+ /* TODO: remove this after RADV switches to lowered IO */
+ // if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
+ // return load_tess_varyings(ctx, instr, true);
+ // }
+
+ // if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ // LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
+ // LLVMValueRef indir_index;
+ // unsigned const_index, vertex_index;
+ // get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index);
+ // assert(indir_index == NULL);
+
+ // return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location,
+ // var->data.location_frac, instr->num_components, vertex_index,
+ // const_index, type);
+ // }
+
+ // for (unsigned chan = comp; chan < ve + comp; chan++) {
+ // if (indir_index) {
+ // unsigned count =
+ // glsl_count_attribute_slots(var->type, ctx->stage == MESA_SHADER_VERTEX);
+ // count -= chan / 4;
+ // LLVMValueRef tmp_vec = build_gather_values_extended(
+ // &ctx->lc, ctx->abi->inputs + idx + chan, count, stride, false, true);
+
+ // values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
+ // } else
+ // values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
+ // }
+ break;
+ case nir_var_function_temp:
+ for (unsigned chan = 0; chan < ve; chan++) {
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = build_gather_values_extended(
+ &ctx->lc, ctx->locals + idx + chan, count, stride, true, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
+ } else {
+ values[chan] =
+ LLVMBuildLoad(ctx->lc.builder, ctx->locals[idx + chan + const_index * stride], "");
+ }
+ }
+ break;
+ case nir_var_shader_out:
+ /* TODO: remove this after RADV switches to lowered IO */
+ // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ // return load_tess_varyings(ctx, instr, false);
+ // }
+
+ // if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch)
+ // return ctx->abi->emit_fbfetch(ctx->abi);
+
+ // for (unsigned chan = comp; chan < ve + comp; chan++) {
+ // if (indir_index) {
+ // unsigned count = glsl_count_attribute_slots(var->type, false);
+ // count -= chan / 4;
+ // LLVMValueRef tmp_vec = build_gather_values_extended(
+ // &ctx->lc, ctx->abi->outputs + idx + chan, count, stride, true, true);
+
+ // values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
+ // } else {
+ // values[chan] = LLVMBuildLoad(ctx->lc.builder,
+ // ctx->abi->outputs[idx + chan + const_index * stride], "");
+ // }
+ // }
+ break;
+ case nir_var_mem_global: {
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
+ unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+ unsigned natural_stride = type_scalar_size_bytes(deref->type);
+ unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = get_elem_bits(&ctx->lc, result_type) / 8;
+ bool split_loads = false;
+
+ if (stride != natural_stride || split_loads) {
+ if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
+ result_type = LLVMGetElementType(result_type);
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
+
+ for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
+ LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, i * stride / natural_stride, 0);
+ values[i] =
+ LLVMBuildLoad(ctx->lc.builder, build_gep_ptr(&ctx->lc, address, offset), "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
+ }
+ return build_gather_values(&ctx->lc, values, instr->dest.ssa.num_components);
+ } else {
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
+ LLVMValueRef val = LLVMBuildLoad(ctx->lc.builder, address, "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
+ return val;
+ }
+ }
+ default:
+ unreachable("unhandle variable mode");
+ }
+ ret = build_varying_gather_values(&ctx->lc, values, ve, comp);
+ return LLVMBuildBitCast(ctx->lc.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+}
+
+static void visit_store_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
+{
+ // if (ctx->lc.postponed_kill) {
+ // LLVMValueRef cond = LLVMBuildLoad(ctx->lc.builder, ctx->lc.postponed_kill, "");
+ // ac_build_ifcc(&ctx->lc, cond, 7002);
+ // }
+
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ LLVMValueRef temp_ptr, value;
+ int idx = 0;
+ unsigned comp = 0;
+ LLVMValueRef src = to_float(&ctx->lc, get_src(ctx, instr->src[1]));
+ int writemask = instr->const_index[0];
+ LLVMValueRef indir_index;
+ unsigned const_index;
+
+ if (var) {
+ get_deref_offset(ctx, deref, false, NULL, NULL, &const_index, &indir_index);
+ idx = var->data.driver_location;
+ comp = var->data.location_frac;
+
+ if (var->data.compact) {
+ const_index += comp;
+ comp = 0;
+ }
+ }
+
+ if (get_elem_bits(&ctx->lc, LLVMTypeOf(src)) == 64 &&
+ (deref->mode == nir_var_shader_out || deref->mode == nir_var_function_temp)) {
+
+ src = LLVMBuildBitCast(ctx->lc.builder, src,
+ LLVMVectorType(ctx->lc.f32, get_llvm_num_components(src) * 2), "");
+
+ writemask = widen_mask(writemask, 2);
+ }
+
+ writemask = writemask << comp;
+
+ switch (deref->mode) {
+ case nir_var_shader_out:
+ /* TODO: remove this after RADV switches to lowered IO */
+ // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ // LLVMValueRef vertex_index = NULL;
+ // LLVMValueRef indir_index = NULL;
+ // unsigned const_index = 0;
+ // const bool is_patch = var->data.patch ||
+ // var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ // var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
+
+ // get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index,
+ // &indir_index);
+
+ // ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src,
+ // writemask, var->data.location_frac, var->data.driver_location);
+ // break;
+ // }
+
+ for (unsigned chan = 0; chan < 8; chan++) {
+ int stride = 4;
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ value = llvm_extract_elem(&ctx->lc, src, chan - comp);
+
+ // if (var->data.compact)
+ // stride = 1;
+ // if (indir_index) {
+ // unsigned count = glsl_count_attribute_slots(var->type, false);
+ // count -= chan / 4;
+ // LLVMValueRef tmp_vec = build_gather_values_extended(
+ // &ctx->lc, ctx->abi->outputs + idx + chan, count, stride, true, true);
+
+ // tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, "");
+ // build_store_values_extended(&ctx->lc, ctx->abi->outputs + idx + chan, count, stride,
+ // tmp_vec);
+
+ // } else {
+ // temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
+
+ // LLVMBuildStore(ctx->lc.builder, value, temp_ptr);
+ // }
+ }
+ break;
+ case nir_var_function_temp:
+ for (unsigned chan = 0; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ value = llvm_extract_elem(&ctx->lc, src, chan);
+ // if (indir_index) {
+ // unsigned count = glsl_count_attribute_slots(var->type, false);
+ // count -= chan / 4;
+ // LLVMValueRef tmp_vec = build_gather_values_extended(
+ // &ctx->lc, ctx->locals + idx + chan, count, 4, true, true);
+
+ // tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, "");
+ // build_store_values_extended(&ctx->lc, ctx->locals + idx + chan, count, 4, tmp_vec);
+ // } else {
+ // temp_ptr = ctx->locals[idx + chan + const_index * 4];
+
+ // LLVMBuildStore(ctx->lc.builder, value, temp_ptr);
+ // }
+ }
+ break;
+
+ case nir_var_mem_global: {
+ int writemask = instr->const_index[0];
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMValueRef val = get_src(ctx, instr->src[1]);
+
+ unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+ unsigned natural_stride = type_scalar_size_bytes(deref->type);
+ unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = get_elem_bits(&ctx->lc, LLVMTypeOf(val)) / 8;
+ bool split_stores = false;
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
+
+ if (writemask == (1u << get_llvm_num_components(val)) - 1 && stride == natural_stride &&
+ !split_stores) {
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
+
+ val = LLVMBuildBitCast(ctx->lc.builder, val, LLVMGetElementType(LLVMTypeOf(address)), "");
+ LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, val, address);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
+ } else {
+ LLVMTypeRef val_type = LLVMTypeOf(val);
+ if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
+ val_type = LLVMGetElementType(val_type);
+
+ LLVMTypeRef ptr_type =
+ LLVMPointerType(val_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
+ for (unsigned chan = 0; chan < 4; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, chan * stride / natural_stride, 0);
+
+ LLVMValueRef ptr = build_gep_ptr(&ctx->lc, address, offset);
+ LLVMValueRef src = llvm_extract_elem(&ctx->lc, val, chan);
+ src = LLVMBuildBitCast(ctx->lc.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), "");
+ LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, src, ptr);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
+ }
+ }
+ break;
+ }
+ default:
+ abort();
+ break;
+ }
+
+ // if (ctx->ac.postponed_kill)
+ // ac_build_endif(&ctx->ac, 7002);
+}
+
+static void visit_intrinsic(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
+{
+ LLVMValueRef result = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_ballot:
+ // result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+ // if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
+ // result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
+ break;
+ case nir_intrinsic_read_invocation:
+ // result =
+ // ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
+ break;
+ case nir_intrinsic_read_first_invocation:
+ // result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
+ break;
+ case nir_intrinsic_load_subgroup_invocation:
+ // result = ac_get_thread_id(&ctx->ac);
+ break;
+ case nir_intrinsic_load_work_group_id: {
+ // LLVMValueRef values[3];
+
+ // for (int i = 0; i < 3; i++) {
+ // values[i] = ctx->args->workgroup_ids[i].used
+ // ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
+ // : ctx->ac.i32_0;
+ // }
+
+ // result = ac_build_gather_values(&ctx->ac, values, 3);
+ break;
+ }
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_first_vertex:
+ //result = ctx->abi->load_base_vertex(ctx->abi);
+ result = LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index);
+ break;
+ case nir_intrinsic_load_local_group_size:
+ // result = ctx->abi->load_local_group_size(ctx->abi);
+ break;
+ case nir_intrinsic_load_vertex_id:
+ result = LLVMBuildAdd(ctx->lc.builder, LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index),
+ LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index), "");
+ break;
+ case nir_intrinsic_load_vertex_id_zero_base: {
+ // result = ctx->abi->vertex_id;
+ result = LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index);
+ break;
+ }
+ case nir_intrinsic_load_local_invocation_id: {
+ // result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
+ break;
+ }
+ case nir_intrinsic_load_base_instance:
+ // result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
+ break;
+ case nir_intrinsic_load_draw_id:
+ // result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
+ break;
+ case nir_intrinsic_load_view_index:
+ // result = ac_get_arg(&ctx->ac, ctx->args->view_index);
+ break;
+ case nir_intrinsic_load_invocation_id:
+ // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
+ // } else {
+ // if (ctx->ac.chip_class >= GFX10) {
+ // result =
+ // LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
+ // LLVMConstInt(ctx->ac.i32, 127, 0), "");
+ // } else {
+ // result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
+ // }
+ // }
+ break;
+ case nir_intrinsic_load_primitive_id:
+ // if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ // result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
+ // } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ // result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
+ // } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ // result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
+ // } else
+ // fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
+ // break;
+ // case nir_intrinsic_load_sample_id:
+ // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ // result = load_sample_pos(ctx);
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ // result = ctx->abi->load_sample_mask_in(ctx->abi);
+ break;
+ case nir_intrinsic_load_frag_coord: {
+ // LLVMValueRef values[4] = {
+ // ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
+ // ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
+ // ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
+ // result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
+ break;
+ }
+ case nir_intrinsic_load_layer_id:
+ // result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ break;
+ case nir_intrinsic_load_front_face:
+ // result = ac_get_arg(&ctx->ac, ctx->args->front_face);
+ break;
+ case nir_intrinsic_load_helper_invocation:
+ // result = ac_build_load_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_is_helper_invocation:
+ // result = ac_build_is_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_load_color0:
+ // result = ctx->abi->color0;
+ break;
+ case nir_intrinsic_load_color1:
+ // result = ctx->abi->color1;
+ break;
+ case nir_intrinsic_load_user_data_amd:
+ // assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
+ // result = ctx->abi->user_data;
+ break;
+ case nir_intrinsic_load_instance_id:
+ // result = ctx->abi->instance_id;
+ break;
+ case nir_intrinsic_load_num_work_groups:
+ // result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
+ break;
+ case nir_intrinsic_load_local_invocation_index:
+ // result = visit_load_local_invocation_index(ctx);
+ break;
+ case nir_intrinsic_load_subgroup_id:
+ // result = visit_load_subgroup_id(ctx);
+ break;
+ case nir_intrinsic_load_num_subgroups:
+ // result = visit_load_num_subgroups(ctx);
+ break;
+ case nir_intrinsic_first_invocation:
+ // result = visit_first_invocation(ctx);
+ break;
+ case nir_intrinsic_load_push_constant:
+ // result = visit_load_push_constant(ctx, instr);
+ break;
+ case nir_intrinsic_vulkan_resource_index: {
+ // LLVMValueRef index = get_src(ctx, instr->src[0]);
+ // unsigned desc_set = nir_intrinsic_desc_set(instr);
+ // unsigned binding = nir_intrinsic_binding(instr);
+
+ // result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
+ break;
+ }
+ case nir_intrinsic_vulkan_resource_reindex:
+ // result = visit_vulkan_resource_reindex(ctx, instr);
+ break;
+ case nir_intrinsic_store_ssbo:
+ // visit_store_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ssbo:
+ // result = visit_load_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ // result = visit_atomic_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ubo:
+ // result = visit_load_ubo_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_get_buffer_size:
+ // result = visit_get_buffer_size(ctx, instr);
+ break;
+ case nir_intrinsic_load_deref:
+ result = visit_load_var(ctx, instr);
+ break;
+ case nir_intrinsic_store_deref:
+ visit_store_var(ctx, instr);
+ break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_vertex:
+ case nir_intrinsic_load_per_vertex_input:
+ // result = visit_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output:
+ // result = visit_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ // visit_store_output(ctx, instr);
+ break;
+ case nir_intrinsic_load_shared:
+ // result = visit_load_shared(ctx, instr);
+ break;
+ case nir_intrinsic_store_shared:
+ // visit_store_shared(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_samples:
+ case nir_intrinsic_image_deref_samples:
+ // result = visit_image_samples(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_load:
+ // result = visit_image_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_load:
+ // result = visit_image_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_store:
+ // visit_image_store(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_store:
+ // visit_image_store(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ // result = visit_image_atomic(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic_inc_wrap:
+ case nir_intrinsic_image_deref_atomic_dec_wrap:
+ // result = visit_image_atomic(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_size:
+ // result = visit_image_size(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_size:
+ // result = visit_image_size(ctx, instr, false);
+ break;
+ case nir_intrinsic_shader_clock:
+ // result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
+ break;
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ // emit_discard(ctx, instr);
+ break;
+ case nir_intrinsic_demote:
+ case nir_intrinsic_demote_if:
+ // emit_demote(ctx, instr);
+ break;
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
+ // emit_membar(&ctx->ac, instr);
+ break;
+ case nir_intrinsic_scoped_barrier: {
+ // assert(!(nir_intrinsic_memory_semantics(instr) &
+ // (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+ // nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
+
+ // unsigned wait_flags = 0;
+ // if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
+ // wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ // if (modes & nir_var_mem_shared)
+ // wait_flags |= AC_WAIT_LGKM;
+
+ // if (wait_flags)
+ // ac_build_waitcnt(&ctx->ac, wait_flags);
+
+ // if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
+ // ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ }
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ break;
+ case nir_intrinsic_control_barrier:
+ // ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_fadd: {
+ // LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
+ // result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_deref_atomic_add:
+ case nir_intrinsic_deref_atomic_imin:
+ case nir_intrinsic_deref_atomic_umin:
+ case nir_intrinsic_deref_atomic_imax:
+ case nir_intrinsic_deref_atomic_umax:
+ case nir_intrinsic_deref_atomic_and:
+ case nir_intrinsic_deref_atomic_or:
+ case nir_intrinsic_deref_atomic_xor:
+ case nir_intrinsic_deref_atomic_exchange:
+ case nir_intrinsic_deref_atomic_comp_swap:
+ case nir_intrinsic_deref_atomic_fadd: {
+ // LLVMValueRef ptr = get_src(ctx, instr->src[0]);
+ // result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_pixel:
+ // result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ // result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_sample:
+ // result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_model:
+ // result = barycentric_model(ctx);
+ break;
+ case nir_intrinsic_load_barycentric_at_offset: {
+ // LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ // result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_at_sample: {
+ // LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
+ // result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
+ break;
+ }
+ case nir_intrinsic_load_interpolated_input: {
+ /* We assume any indirect loads have been lowered away */
+ // ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
+ // assert(offset);
+ // assert(offset[0].i32 == 0);
+
+ // LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
+ // unsigned index = nir_intrinsic_base(instr);
+ // unsigned component = nir_intrinsic_component(instr);
+ // result = load_interpolated_input(ctx, interp_param, index, component,
+ // instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
+ break;
+ }
+ case nir_intrinsic_emit_vertex:
+ // ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
+ break;
+ case nir_intrinsic_emit_vertex_with_counter: {
+ // unsigned stream = nir_intrinsic_stream_id(instr);
+ // LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+ // ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
+ break;
+ }
+ case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ // ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
+ break;
+ case nir_intrinsic_load_tess_coord:
+ // result = ctx->abi->load_tess_coord(ctx->abi);
+ break;
+ case nir_intrinsic_load_tess_level_outer:
+ // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
+ break;
+ case nir_intrinsic_load_tess_level_inner:
+ // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
+ break;
+ case nir_intrinsic_load_tess_level_outer_default:
+ // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
+ break;
+ case nir_intrinsic_load_tess_level_inner_default:
+ // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ // result = ctx->abi->load_patch_vertices_in(ctx->abi);
+ break;
+ case nir_intrinsic_vote_all: {
+ // LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
+ // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_vote_any: {
+ // LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
+ // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_shuffle:
+ // if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
+ // (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
+ // result =
+ // ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
+ // } else {
+ // LLVMValueRef src = get_src(ctx, instr->src[0]);
+ // LLVMValueRef index = get_src(ctx, instr->src[1]);
+ // LLVMTypeRef type = LLVMTypeOf(src);
+ // struct waterfall_context wctx;
+ // LLVMValueRef index_val;
+
+ // index_val = enter_waterfall(ctx, &wctx, index, true);
+
+ // src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
+
+ // result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
+ // (LLVMValueRef[]){src, index_val}, 2,
+ // AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ // result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
+
+ // result = exit_waterfall(ctx, &wctx, result);
+ // }
+ break;
+ case nir_intrinsic_reduce:
+ // result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
+ // instr->const_index[1]);
+ break;
+ case nir_intrinsic_inclusive_scan:
+ // result =
+ // ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
+ break;
+ case nir_intrinsic_exclusive_scan:
+ // result =
+ // ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
+ break;
+ case nir_intrinsic_quad_broadcast: {
+ // unsigned lane = nir_src_as_uint(instr->src[1]);
+ // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
+ break;
+ }
+ case nir_intrinsic_quad_swap_horizontal:
+ // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
+ break;
+ case nir_intrinsic_quad_swap_vertical:
+ // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
+ break;
+ case nir_intrinsic_quad_swap_diagonal:
+ // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
+ break;
+ case nir_intrinsic_quad_swizzle_amd: {
+ // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
+ // (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
+ break;
+ }
+ case nir_intrinsic_masked_swizzle_amd: {
+ // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ // result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
+ break;
+ }
+ case nir_intrinsic_write_invocation_amd:
+ // result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
+ // get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
+ break;
+ case nir_intrinsic_mbcnt_amd:
+ // result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
+ break;
+ case nir_intrinsic_load_scratch: {
+ // LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ // LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
+ // LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ // LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
+ // ? comp_type
+ // : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ // unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ // ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
+ // result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ case nir_intrinsic_store_scratch: {
+ // LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ // LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
+ // LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+ // unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ // ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
+ // LLVMValueRef src = get_src(ctx, instr->src[0]);
+ // unsigned wrmask = nir_intrinsic_write_mask(instr);
+ // while (wrmask) {
+ // int start, count;
+ // u_bit_scan_consecutive_range(&wrmask, &start, &count);
+
+ // LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+ // LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+ // LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+ // offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,
+ // LLVMPointerType(vec_type, addr_space), "");
+ // LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);
+ // LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+ // }
+ break;
+ }
+ case nir_intrinsic_load_constant: {
+ // unsigned base = nir_intrinsic_base(instr);
+ // unsigned range = nir_intrinsic_range(instr);
+
+ // LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ // offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
+
+ // /* Clamp the offset to avoid out-of-bound access because global
+ // * instructions can't handle them.
+ // */
+ // LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
+ // LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+ // offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
+
+ // LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);
+ // LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ // LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
+ // ? comp_type
+ // : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ // unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ // ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
+ // result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ default:
+ fprintf(stderr, "Unknown intrinsic: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ break;
+ }
+ if (result) {
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ }
+}
+
+static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list);
+
+static void visit_block(struct libresoc_nir_tran_ctx *ctx, nir_block *block)
+{
+ nir_foreach_instr (instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ visit_alu(ctx, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_load_const:
+ visit_load_const(ctx, nir_instr_as_load_const(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ // visit_tex(ctx, nir_instr_as_tex(instr));
+ break;
+ case nir_instr_type_phi:
+ visit_phi(ctx, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_ssa_undef:
+ visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
+ break;
+ case nir_instr_type_jump:
+ visit_jump(&ctx->lc, nir_instr_as_jump(instr));
+ break;
+ case nir_instr_type_deref:
+ visit_deref(ctx, nir_instr_as_deref(instr));
+ break;
+ default:
+ fprintf(stderr, "Unknown NIR instr type: ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+ }
+}
+
+static void visit_if(struct libresoc_nir_tran_ctx *ctx, nir_if *if_stmt)
+{
+
+}
+
+static void visit_loop(struct libresoc_nir_tran_ctx *ctx, nir_loop *loop)
+{
+
+}
+
+static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, node, node, list)
+ {
+ switch (node->type) {
+ case nir_cf_node_block:
+ visit_block(ctx, nir_cf_node_as_block(node));
+ break;
+
+ case nir_cf_node_if:
+ visit_if(ctx, nir_cf_node_as_if(node));
+ break;
+
+ case nir_cf_node_loop:
+ visit_loop(ctx, nir_cf_node_as_loop(node));
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir)
+{
+ struct libresoc_nir_tran_ctx ctx = {};
+ struct nir_function *func;
+ char shader_name[60];
+ sprintf(shader_name, "libresoc-shader-%s", gl_shader_stage_name(nir->info.stage));
+ LLVMModuleRef mod = LLVMModuleCreateWithNameInContext(shader_name, llvm_ref->lc.context);
+ ctx.lc.module = &mod;
+ ctx.lc = llvm_ref->lc;
+ ctx.stage = nir->info.stage;
+ ctx.info = &nir->info;
+
+ if (ctx.stage == MESA_SHADER_VERTEX) {
+ add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.base_vertex);
+ add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.start_instance);
+ add_arg(&ctx.args, ARG_VGPR, 1, ARG_INT, &ctx.args.vertex_id);
+ }
+ LLVMTypeRef arg_types[32];
+ LLVMTypeRef ret_type = LLVMVoidTypeInContext(ctx.lc.context);
+ for (unsigned i = 0; i < ctx.args.arg_count; i++) {
+ arg_types[i] = arg_llvm_type(ctx.args.args[i].type, ctx.args.args[i].size, &ctx.lc);
+ }
+
+ //TODO: this is zero argument function and returns void
+ LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, ctx.args.arg_count, 0);
+
+ LLVMValueRef main_function = LLVMAddFunction(mod, "main_function", main_function_type);
+ LLVMBasicBlockRef main_function_body =
+ LLVMAppendBasicBlockInContext(ctx.lc.context, main_function, "main_body");
+ LLVMPositionBuilderAtEnd(ctx.lc.builder, main_function_body);
+ ctx.main_function = main_function;
+
+ ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+ func = (struct nir_function *)exec_list_get_head(&nir->functions);
+
+ nir_index_ssa_defs(func->impl);
+ ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
+ setup_locals(&ctx, func);
+ setup_scratch(&ctx, nir);
+ setup_constant_data(&ctx, nir);
+
+ // if (gl_shader_stage_is_compute(nir->info.stage))
+ // setup_shared(&ctx, nir);
+ visit_cf_list(&ctx, &func->impl->body);
char *error = NULL;
- LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
+ LLVMVerifyModule(mod, LLVMPrintMessageAction, &error);
LLVMDumpModule(mod);
LLVMDisposeMessage(error);
- LLVMOrcModuleHandle mod_handle;
- LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
- &mod_handle,
- mod,
- orc_sym_resolver,
- (void *)(llvm_ref->orc_ref));
+ return mod;
+ // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod");
+ // LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() };
+ // LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0);
+ // LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type);
+ // LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry");
+ // LLVMBuilderRef builder = LLVMCreateBuilder();
+ // LLVMPositionBuilderAtEnd(builder, entry);
+ // LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp");
+ // LLVMBuildRet(builder, tmp);
+ // char *error = NULL;
+ // LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
+ // LLVMDumpModule(mod);
+ // LLVMDisposeMessage(error);
+ // LLVMOrcModuleHandle mod_handle;
+ // LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
+ // &mod_handle,
+ // mod,
+ // orc_sym_resolver,
+ // (void *)(llvm_ref->orc_ref));
}
--- /dev/null
+#include "libresoc_llvm_build.h"
+#include "util/macros.h"
+#include "util/bitscan.h"
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <alloca.h>
+
+
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct llvm_flow {
+ /* Loop exit or next part of if/else/endif. */
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef loop_entry_block;
+};
+
+void enable_signed_zeros(struct libresoc_llvm_context *ctx)
+{
+ //TODO: this is in C++, need to convert this into C
+ // if (ctx->float_mode == FLOAT_MODE_DEFAULT_OPENGL) {
+ // llvm::FastMathFlags flags = ctx->b->getFastMathFlags();
+
+ // /* This disables the optimization of (x + 0), which is used
+ // * to convert negative zero to positive zero.
+ // */
+ // flags.setNoSignedZeros(false);
+ // ctx->b->setFastMathFlags(flags);
+ // }
+}
+
+void disable_signed_zeros(struct libresoc_llvm_context *ctx)
+{
+ //TODO: this is in C++, need to convert this into C
+ // if (ctx->float_mode == FLOAT_MODE_DEFAULT_OPENGL)
+ // llvm::FastMathFlags flags = ctx->b->getFastMathFlags();
+
+ // flags.setNoSignedZeros();
+ // ctx->b->setFastMathFlags(flags);
+ // }
+}
+
+static const char *attr_to_str(enum func_attr attr)
+{
+ switch (attr) {
+ case FUNC_ATTR_ALWAYSINLINE:
+ return "alwaysinline";
+ case FUNC_ATTR_INREG:
+ return "inreg";
+ case FUNC_ATTR_NOALIAS:
+ return "noalias";
+ case FUNC_ATTR_NOUNWIND:
+ return "nounwind";
+ case FUNC_ATTR_READNONE:
+ return "readnone";
+ case FUNC_ATTR_READONLY:
+ return "readonly";
+ case FUNC_ATTR_WRITEONLY:
+ return "writeonly";
+ case FUNC_ATTR_INACCESSIBLE_MEM_ONLY:
+ return "inaccessiblememonly";
+ case FUNC_ATTR_CONVERGENT:
+ return "convergent";
+ default:
+ fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+ return 0;
+ }
+}
+
+void add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
+ enum func_attr attr)
+{
+ const char *attr_name = attr_to_str(attr);
+ unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name));
+ LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+ if (LLVMIsAFunction(function))
+ LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+ else
+ LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
+}
+
+void add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask)
+{
+ attrib_mask |= FUNC_ATTR_NOUNWIND;
+ attrib_mask &= ~FUNC_ATTR_LEGACY;
+
+ while (attrib_mask) {
+ enum func_attr attr = 1u << u_bit_scan(&attrib_mask);
+ add_function_attr(ctx, function, -1, attr);
+ }
+}
+
+static struct llvm_flow *get_innermost_loop(struct libresoc_llvm_context *lc)
+{
+ for (unsigned i = lc->flow->depth; i > 0; --i) {
+ if (lc->flow->stack[i - 1].loop_entry_block)
+ return &lc->flow->stack[i - 1];
+ }
+ return NULL;
+}
+
+static LLVMValueRef eliminate_negative_zero(struct libresoc_llvm_context *ctx, LLVMValueRef val)
+{
+ enable_signed_zeros(ctx);
+ /* (val + 0) converts negative zero to positive zero. */
+ val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), "");
+ disable_signed_zeros(ctx);
+ return val;
+}
+
+void build_break(struct libresoc_llvm_context *lc)
+{
+ struct llvm_flow *flow = get_innermost_loop(lc);
+ LLVMBuildBr(lc->builder, flow->next_block);
+}
+
+void build_continue(struct libresoc_llvm_context *lc)
+{
+ struct llvm_flow *flow = get_innermost_loop(lc);
+ LLVMBuildBr(lc->builder, flow->loop_entry_block);
+}
+
+int get_llvm_num_components(LLVMValueRef value)
+{
+ LLVMTypeRef type = LLVMTypeOf(value);
+ unsigned num_components =
+ LLVMGetTypeKind(type) == LLVMVectorTypeKind ? LLVMGetVectorSize(type) : 1;
+ return num_components;
+}
+
+LLVMValueRef llvm_extract_elem(struct libresoc_llvm_context *lc, LLVMValueRef value, int index)
+{
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
+ assert(index == 0);
+ return value;
+ }
+
+ return LLVMBuildExtractElement(lc->builder, value, LLVMConstInt(lc->i32, index, false), "");
+}
+
+int get_elem_bits(struct libresoc_llvm_context *lc, LLVMTypeRef type)
+{
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+ type = LLVMGetElementType(type);
+
+ if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
+ return LLVMGetIntTypeWidth(type);
+
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
+ if (LLVMGetPointerAddressSpace(type) == ADDR_SPACE_LDS)
+ return 32;
+ }
+
+ if (type == lc->f16)
+ return 16;
+ if (type == lc->f32)
+ return 32;
+ if (type == lc->f64)
+ return 64;
+
+ unreachable("Unhandled type kind in get_elem_bits");
+}
+
+/**
+ * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
+ * intrinsic names).
+ */
+void build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
+{
+ LLVMTypeRef elem_type = type;
+
+ assert(bufsize >= 8);
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ int ret = snprintf(buf, bufsize, "v%u", LLVMGetVectorSize(type));
+ if (ret < 0) {
+ char *type_name = LLVMPrintTypeToString(type);
+ fprintf(stderr, "Error building type name for: %s\n", type_name);
+ LLVMDisposeMessage(type_name);
+ return;
+ }
+ elem_type = LLVMGetElementType(type);
+ buf += ret;
+ bufsize -= ret;
+ }
+ switch (LLVMGetTypeKind(elem_type)) {
+ default:
+ break;
+ case LLVMIntegerTypeKind:
+ snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+ break;
+ case LLVMHalfTypeKind:
+ snprintf(buf, bufsize, "f16");
+ break;
+ case LLVMFloatTypeKind:
+ snprintf(buf, bufsize, "f32");
+ break;
+ case LLVMDoubleTypeKind:
+ snprintf(buf, bufsize, "f64");
+ break;
+ }
+}
+
+static LLVMTypeRef to_integer_type_scalar(struct libresoc_llvm_context *lc, LLVMTypeRef t)
+{
+ if (t == lc->i8)
+ return lc->i8;
+ else if (t == lc->f16 || t == lc->i16)
+ return lc->i16;
+ else if (t == lc->f32 || t == lc->i32)
+ return lc->i32;
+ else if (t == lc->f64 || t == lc->i64)
+ return lc->i64;
+ else
+ unreachable("Unhandled integer size");
+}
+
+LLVMTypeRef to_integer_type(struct libresoc_llvm_context *lc, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_integer_type_scalar(lc, elem_type), LLVMGetVectorSize(t));
+ }
+ if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
+ switch (LLVMGetPointerAddressSpace(t)) {
+ case ADDR_SPACE_GLOBAL:
+ return lc->i64;
+ case ADDR_SPACE_CONST_32BIT:
+ case ADDR_SPACE_LDS:
+ return lc->i32;
+ default:
+ unreachable("unhandled address space");
+ }
+ }
+ return to_integer_type_scalar(lc, t);
+}
+
+LLVMValueRef to_integer(struct libresoc_llvm_context *lc, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
+ return LLVMBuildPtrToInt(lc->builder, v, to_integer_type(lc, type), "");
+ }
+ return LLVMBuildBitCast(lc->builder, v, to_integer_type(lc, type), "");
+}
+
+LLVMValueRef to_integer_or_pointer(struct libresoc_llvm_context *lc, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
+ return v;
+ return to_integer(lc, v);
+}
+
+static LLVMTypeRef to_float_type_scalar(struct libresoc_llvm_context *lc, LLVMTypeRef t)
+{
+ if (t == lc->i8)
+ return lc->i8;
+ else if (t == lc->i16 || t == lc->f16)
+ return lc->f16;
+ else if (t == lc->i32 || t == lc->f32)
+ return lc->f32;
+ else if (t == lc->i64 || t == lc->f64)
+ return lc->f64;
+ else
+ unreachable("Unhandled float size");
+}
+
+LLVMTypeRef to_float_type(struct libresoc_llvm_context *lc, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_float_type_scalar(lc, elem_type), LLVMGetVectorSize(t));
+ }
+ return to_float_type_scalar(lc, t);
+}
+
+LLVMValueRef to_float(struct libresoc_llvm_context *lc, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(lc->builder, v, to_float_type(lc, type), "");
+}
+
+unsigned get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMHalfTypeKind:
+ return 2;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMDoubleTypeKind:
+ return 8;
+ case LLVMPointerTypeKind:
+ if (LLVMGetPointerAddressSpace(type) == ADDR_SPACE_CONST_32BIT)
+ return 4;
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) * get_type_size(LLVMGetElementType(type));
+ case LLVMArrayTypeKind:
+ return LLVMGetArrayLength(type) * get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+LLVMValueRef build_intrinsic(struct libresoc_llvm_context *lc, const char *name,
+ LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count,
+ unsigned attrib_mask)
+{
+ LLVMValueRef function, call;
+ bool set_callsite_attrs = !(attrib_mask & FUNC_ATTR_LEGACY);
+
+ function = LLVMGetNamedFunction(*(lc->module), name);
+ if (!function) {
+ LLVMTypeRef param_types[32], function_type;
+ unsigned i;
+
+ assert(param_count <= 32);
+
+ for (i = 0; i < param_count; ++i) {
+ assert(params[i]);
+ param_types[i] = LLVMTypeOf(params[i]);
+ }
+ function_type = LLVMFunctionType(return_type, param_types, param_count, 0);
+ function = LLVMAddFunction(*(lc->module), name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ if (!set_callsite_attrs)
+ add_func_attributes(lc->context, function, attrib_mask);
+ }
+
+ call = LLVMBuildCall(lc->builder, function, params, param_count, "");
+ if (set_callsite_attrs)
+ add_func_attributes(lc->context, call, attrib_mask);
+ return call;
+}
+
+LLVMValueRef build_canonicalize(struct libresoc_llvm_context *lc, LLVMValueRef src0, unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.canonicalize.f16";
+ type = lc->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.canonicalize.f32";
+ type = lc->f32;
+ } else {
+ intr = "llvm.canonicalize.f64";
+ type = lc->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return build_intrinsic(lc, intr, type, params, 1, FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef build_alloca_undef(struct libresoc_llvm_context *lc, LLVMTypeRef type, const char *name)
+{
+ LLVMBuilderRef builder = lc->builder;
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(lc->context);
+ LLVMValueRef res;
+
+ if (first_instr) {
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+ } else {
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ }
+
+ res = LLVMBuildAlloca(first_builder, type, name);
+ LLVMDisposeBuilder(first_builder);
+ return res;
+}
+
+LLVMValueRef build_gep_ptr(struct libresoc_llvm_context *lc, LLVMValueRef base_ptr,
+ LLVMValueRef index)
+{
+ return LLVMBuildGEP(lc->builder, base_ptr, &index, 1, "");
+}
+
+LLVMValueRef build_gep0(struct libresoc_llvm_context *lc, LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ LLVMValueRef indices[2] = {
+ lc->i32_0,
+ index,
+ };
+ return LLVMBuildGEP(lc->builder, base_ptr, indices, 2, "");
+}
+
+void build_sendmsg(struct libresoc_llvm_context *lc, uint32_t msg, LLVMValueRef wave_id)
+{
+ LLVMValueRef args[2];
+ args[0] = LLVMConstInt(lc->i32, msg, false);
+ args[1] = wave_id;
+ build_intrinsic(lc, "llvm.amdgcn.s.sendmsg", lc->voidt, args, 2, 0);
+}
+
+LLVMValueRef build_imsb(struct libresoc_llvm_context *lc, LLVMValueRef arg, LLVMTypeRef dst_type)
+{
+ LLVMValueRef msb =
+ build_intrinsic(lc, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, FUNC_ATTR_READNONE);
+
+ /* The HW returns the last bit index from MSB, but NIR/TGSI wants
+ * the index from LSB. Invert it by doing "31 - msb". */
+ msb = LLVMBuildSub(lc->builder, LLVMConstInt(lc->i32, 31, false), msb, "");
+
+ LLVMValueRef all_ones = LLVMConstInt(lc->i32, -1, true);
+ LLVMValueRef cond =
+ LLVMBuildOr(lc->builder, LLVMBuildICmp(lc->builder, LLVMIntEQ, arg, lc->i32_0, ""),
+ LLVMBuildICmp(lc->builder, LLVMIntEQ, arg, all_ones, ""), "");
+
+ return LLVMBuildSelect(lc->builder, cond, all_ones, msb, "");
+}
+
+LLVMValueRef build_umsb(struct libresoc_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
+{
+ const char *intrin_name;
+ LLVMTypeRef type;
+ LLVMValueRef highest_bit;
+ LLVMValueRef zero;
+ unsigned bitsize;
+
+ bitsize = get_elem_bits(ctx, LLVMTypeOf(arg));
+ switch (bitsize) {
+ case 64:
+ intrin_name = "llvm.ctlz.i64";
+ type = ctx->i64;
+ highest_bit = LLVMConstInt(ctx->i64, 63, false);
+ zero = ctx->i64_0;
+ break;
+ case 32:
+ intrin_name = "llvm.ctlz.i32";
+ type = ctx->i32;
+ highest_bit = LLVMConstInt(ctx->i32, 31, false);
+ zero = ctx->i32_0;
+ break;
+ case 16:
+ intrin_name = "llvm.ctlz.i16";
+ type = ctx->i16;
+ highest_bit = LLVMConstInt(ctx->i16, 15, false);
+ zero = ctx->i16_0;
+ break;
+ case 8:
+ intrin_name = "llvm.ctlz.i8";
+ type = ctx->i8;
+ highest_bit = LLVMConstInt(ctx->i8, 7, false);
+ zero = ctx->i8_0;
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ LLVMValueRef params[2] = {
+ arg,
+ ctx->i1true,
+ };
+
+ LLVMValueRef msb = build_intrinsic(ctx, intrin_name, type, params, 2, FUNC_ATTR_READNONE);
+
+ /* The HW returns the last bit index from MSB, but TGSI/NIR wants
+ * the index from LSB. Invert it by doing "31 - msb". */
+ msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
+
+ if (bitsize == 64) {
+ msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
+ } else if (bitsize < 32) {
+ msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
+ }
+
+ /* check for zero */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
+ LLVMConstInt(ctx->i32, -1, true), msb, "");
+}
+
+LLVMValueRef build_fmin(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
+{
+ char name[64], type[64];
+
+ build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.minnum.%s", type);
+ LLVMValueRef args[2] = {a, b};
+ return build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef build_fmax(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
+{
+ char name[64], type[64];
+
+ build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.maxnum.%s", type);
+ LLVMValueRef args[2] = {a, b};
+ return build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef build_imin(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef build_imax(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef build_umin(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef build_umax(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef build_clamp(struct libresoc_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMTypeRef t = LLVMTypeOf(value);
+ return build_fmin(ctx, build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
+ LLVMConstReal(t, 1.0));
+}
+
+LLVMValueRef build_gather_values_extended(struct libresoc_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned value_stride, bool load,
+ bool always_vector)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef vec = NULL;
+ unsigned i;
+
+ if (value_count == 1 && !always_vector) {
+ if (load)
+ return LLVMBuildLoad(builder, values[0], "");
+ return values[0];
+ } else if (!value_count)
+ unreachable("value_count is 0");
+
+ for (i = 0; i < value_count; i++) {
+ LLVMValueRef value = values[i * value_stride];
+ if (load)
+ value = LLVMBuildLoad(builder, value, "");
+
+ if (!i)
+ vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+ vec = LLVMBuildInsertElement(builder, vec, value, index, "");
+ }
+ return vec;
+}
+
+LLVMValueRef build_gather_values(struct libresoc_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count)
+{
+ return build_gather_values_extended(ctx, values, value_count, 1, false, false);
+}
+
+LLVMValueRef build_varying_gather_values(struct libresoc_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned component)
+{
+ LLVMValueRef vec = NULL;
+
+ if (value_count == 1) {
+ return values[component];
+ } else if (!value_count)
+ unreachable("value_count is 0");
+
+ for (unsigned i = component; i < value_count + component; i++) {
+ LLVMValueRef value = values[i];
+
+ if (i == component)
+ vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
+ vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
+ }
+ return vec;
+}
+
+LLVMValueRef build_fdiv(struct libresoc_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den)
+{
+ unsigned type_size = get_type_size(LLVMTypeOf(den));
+ const char *name;
+
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->float_mode == FLOAT_MODE_DEFAULT_OPENGL && type_size == 8)
+ return LLVMBuildFDiv(ctx->builder, num, den, "");
+
+ if (type_size == 2)
+ name = "llvm.amdgcn.rcp.f16";
+ else if (type_size == 4)
+ name = "llvm.amdgcn.rcp.f32";
+ else
+ name = "llvm.amdgcn.rcp.f64";
+
+ LLVMValueRef rcp =
+ build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, FUNC_ATTR_READNONE);
+
+ return LLVMBuildFMul(ctx->builder, num, rcp, "");
+}
+
+LLVMValueRef const_uint_vec(struct libresoc_llvm_context *ctx, LLVMTypeRef type, uint64_t value)
+{
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ LLVMValueRef scalar = LLVMConstInt(LLVMGetElementType(type), value, 0);
+ unsigned vec_size = LLVMGetVectorSize(type);
+ LLVMValueRef *scalars = alloca(vec_size * sizeof(LLVMValueRef *));
+
+ for (unsigned i = 0; i < vec_size; i++)
+ scalars[i] = scalar;
+ return LLVMConstVector(scalars, vec_size);
+ }
+ return LLVMConstInt(type, value, 0);
+}
+
+LLVMValueRef build_isign(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMTypeRef type = LLVMTypeOf(src0);
+ LLVMValueRef val;
+
+ /* v_med3 is selected only when max is first. (LLVM bug?) */
+ val = build_imax(ctx, src0, const_uint_vec(ctx, type, -1));
+ return build_imin(ctx, val, const_uint_vec(ctx, type, 1));
+}
+
+LLVMValueRef build_fsign(struct libresoc_llvm_context *ctx, LLVMValueRef src)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef pos, neg, dw[2], val;
+ unsigned bitsize = get_elem_bits(ctx, type);
+
+ /* The standard version leads to this:
+ * v_cmp_ngt_f32_e64 s[0:1], s4, 0 ; D40B0000 00010004
+ * v_cndmask_b32_e64 v4, 1.0, s4, s[0:1] ; D5010004 000008F2
+ * v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880
+ * v_cndmask_b32_e32 v4, -1.0, v4, vcc ; 020808F3
+ *
+ * The isign version:
+ * v_add_f32_e64 v4, s4, 0 ; D5030004 00010004
+ * v_med3_i32 v4, v4, -1, 1 ; D5580004 02058304
+ * v_cvt_f32_i32_e32 v4, v4 ; 7E080B04
+ *
+ * (src0 + 0) converts negative zero to positive zero.
+ * After that, int(fsign(x)) == isign(floatBitsToInt(x)).
+ *
+ * For FP64, use the standard version, which doesn't suffer from the huge DP rate
+ * reduction. (FP64 comparisons are as fast as int64 comparisons)
+ */
+ if (bitsize == 16 || bitsize == 32) {
+ val = to_integer(ctx, eliminate_negative_zero(ctx, src));
+ val = build_isign(ctx, val);
+ return LLVMBuildSIToFP(ctx->builder, val, type, "");
+ }
+
+ assert(bitsize == 64);
+ pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, "");
+ neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, "");
+ dw[0] = ctx->i32_0;
+ dw[1] = LLVMBuildSelect(
+ ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0),
+ LLVMBuildSelect(ctx->builder, neg, LLVMConstInt(ctx->i32, 0xBFF00000, 0), ctx->i32_0, ""),
+ "");
+ return LLVMBuildBitCast(ctx->builder, build_gather_values(ctx, dw, 2), ctx->f64, "");
+}
+
+LLVMValueRef build_bitfield_reverse(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ unsigned bitsize;
+
+ bitsize = get_elem_bits(ctx, LLVMTypeOf(src0));
+
+ switch (bitsize) {
+ case 64:
+ result = build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 32:
+ result = build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+ break;
+ case 16:
+ result = build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ return result;
+}
+
+LLVMValueRef build_bit_count(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ unsigned bitsize;
+
+ bitsize = get_elem_bits(ctx, LLVMTypeOf(src0));
+
+ switch (bitsize) {
+ case 128:
+ result = build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 64:
+ result = build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 32:
+ result = build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+ break;
+ case 16:
+ result = build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
+ FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ return result;
+}
+
+LLVMValueRef build_bfe(struct libresoc_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset,
+ LLVMValueRef width, bool is_signed)
+{
+ LLVMValueRef args[] = {
+ input,
+ offset,
+ width,
+ };
+
+ return build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32",
+ ctx->i32, args, 3, FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef find_lsb(struct libresoc_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0)
+{
+ unsigned src0_bitsize = get_elem_bits(ctx, LLVMTypeOf(src0));
+ const char *intrin_name;
+ LLVMTypeRef type;
+ LLVMValueRef zero;
+
+ switch (src0_bitsize) {
+ case 64:
+ intrin_name = "llvm.cttz.i64";
+ type = ctx->i64;
+ zero = ctx->i64_0;
+ break;
+ case 32:
+ intrin_name = "llvm.cttz.i32";
+ type = ctx->i32;
+ zero = ctx->i32_0;
+ break;
+ case 16:
+ intrin_name = "llvm.cttz.i16";
+ type = ctx->i16;
+ zero = ctx->i16_0;
+ break;
+ case 8:
+ intrin_name = "llvm.cttz.i8";
+ type = ctx->i8;
+ zero = ctx->i8_0;
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ }
+
+ LLVMValueRef params[2] = {
+ src0,
+
+ /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+ * add special code to check for x=0. The reason is that
+ * the LLVM behavior for x=0 is different from what we
+ * need here. However, LLVM also assumes that ffs(x) is
+ * in [0, 31], but GLSL expects that ffs(0) = -1, so
+ * a conditional assignment to handle 0 is still required.
+ *
+ * The hardware already implements the correct behavior.
+ */
+ ctx->i1true,
+ };
+
+ LLVMValueRef lsb = build_intrinsic(ctx, intrin_name, type, params, 2, FUNC_ATTR_READNONE);
+
+ if (src0_bitsize == 64) {
+ lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
+ } else if (src0_bitsize < 32) {
+ lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
+ }
+
+ /* TODO: We need an intrinsic to skip this conditional. */
+ /* Check for zero: */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""),
+ LLVMConstInt(ctx->i32, -1, 0), lsb, "");
+}
+
+LLVMValueRef build_image_get_sample_count(struct libresoc_llvm_context *ctx, LLVMValueRef rsrc)
+{
+ LLVMValueRef samples;
+
+ /* Read the samples from the descriptor directly.
+ * Hardware doesn't have any instruction for this.
+ */
+ samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), "");
+ samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), "");
+ samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), "");
+ samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, "");
+ return samples;
+}
+
+LLVMValueRef build_cvt_pkrtz_f16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2])
+{
+ return build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2,
+ FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef build_cvt_pknorm_i16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2])
+{
+ LLVMValueRef res = build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2,
+ FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+LLVMValueRef build_cvt_pknorm_u16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2])
+{
+ LLVMValueRef res = build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2,
+ FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef build_cvt_pk_i16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
+ LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
+ LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1;
+ LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+
+ /* Clamp. */
+ if (bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = build_imin(ctx, args[i], alpha ? max_alpha : max_rgb);
+ args[i] = build_imax(ctx, args[i], alpha ? min_alpha : min_rgb);
+ }
+ }
+
+ LLVMValueRef res =
+ build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef build_cvt_pk_u16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
+ LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+
+ /* Clamp. */
+ if (bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = build_umin(ctx, args[i], alpha ? max_alpha : max_rgb);
+ }
+ }
+
+ LLVMValueRef res =
+ build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+LLVMValueRef build_wqm_vote(struct libresoc_llvm_context *ctx, LLVMValueRef i1)
+{
+ return build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, FUNC_ATTR_READNONE);
+}
+
+void build_kill_if_false(struct libresoc_llvm_context *ctx, LLVMValueRef i1)
+{
+ build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0);
+}