From: Vivek Pandya Date: Sat, 19 Dec 2020 10:57:08 +0000 (+0530) Subject: At this commit driver is able to generate broken LLVM IR X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=09e8b53875cb0e77decabbd3e5325549fa3d94fd;p=mesa.git At this commit driver is able to generate broken LLVM IR for a vulkan demo exmple. --- diff --git a/src/libre-soc/vulkan/libresoc_llvm.c b/src/libre-soc/vulkan/libresoc_llvm.c index 77248c8d530..bc17e979b16 100644 --- a/src/libre-soc/vulkan/libresoc_llvm.c +++ b/src/libre-soc/vulkan/libresoc_llvm.c @@ -1,9 +1,36 @@ #include "libresoc_llvm.h" +#include "libresoc_shader_args.h" +#include "libresoc_llvm_build.h" #include #include #include #include "nir/nir.h" #include "nir/nir_deref.h" +#include + +struct libresoc_nir_tran_ctx { + struct libresoc_llvm_context lc; + gl_shader_stage stage; + shader_info *info; + + struct shader_args args; + LLVMValueRef *ssa_defs; + + LLVMValueRef scratch; + LLVMValueRef constant_data; + + struct hash_table *defs; + struct hash_table *phis; + struct hash_table *vars; + struct hash_table *verified_interp; + + LLVMValueRef main_function; + LLVMBasicBlockRef continue_block; + LLVMBasicBlockRef break_block; + + int num_locals; + LLVMValueRef *locals; +}; void InitLLVM(struct libresoc_llvm *llvm_ref) { @@ -29,8 +56,53 @@ void InitLLVM(struct libresoc_llvm *llvm_ref) //assert(tm_ref); LLVMDisposeErrorMessage(def_triple); llvm_ref->orc_ref = LLVMOrcCreateInstance(tm_ref); - llvm_ref->context = LLVMContextCreate(); - llvm_ref->builder = LLVMCreateBuilderInContext(llvm_ref->context); + llvm_ref->lc.context = LLVMContextCreate(); + llvm_ref->lc.builder = LLVMCreateBuilderInContext(llvm_ref->lc.context); + llvm_ref->lc.voidt = LLVMVoidTypeInContext(llvm_ref->lc.context); + llvm_ref->lc.i1 = LLVMInt1TypeInContext(llvm_ref->lc.context); + llvm_ref->lc.i8 = LLVMInt8TypeInContext(llvm_ref->lc.context); + llvm_ref->lc.i16 = LLVMIntTypeInContext(llvm_ref->lc.context, 16); + llvm_ref->lc.i32 = LLVMIntTypeInContext(llvm_ref->lc.context, 32); + llvm_ref->lc.i64 = LLVMIntTypeInContext(llvm_ref->lc.context, 64); + llvm_ref->lc.i128 = LLVMIntTypeInContext(llvm_ref->lc.context, 128); + llvm_ref->lc.intptr = llvm_ref->lc.i32; + llvm_ref->lc.f16 = LLVMHalfTypeInContext(llvm_ref->lc.context); + llvm_ref->lc.f32 = LLVMFloatTypeInContext(llvm_ref->lc.context); + llvm_ref->lc.f64 = LLVMDoubleTypeInContext(llvm_ref->lc.context); + llvm_ref->lc.v2i16 = LLVMVectorType(llvm_ref->lc.i16, 2); + llvm_ref->lc.v4i16 = LLVMVectorType(llvm_ref->lc.i16, 4); + llvm_ref->lc.v2f16 = LLVMVectorType(llvm_ref->lc.f16, 2); + llvm_ref->lc.v4f16 = LLVMVectorType(llvm_ref->lc.f16, 4); + llvm_ref->lc.v2i32 = LLVMVectorType(llvm_ref->lc.i32, 2); + llvm_ref->lc.v3i32 = LLVMVectorType(llvm_ref->lc.i32, 3); + llvm_ref->lc.v4i32 = LLVMVectorType(llvm_ref->lc.i32, 4); + llvm_ref->lc.v2f32 = LLVMVectorType(llvm_ref->lc.f32, 2); + llvm_ref->lc.v3f32 = LLVMVectorType(llvm_ref->lc.f32, 3); + llvm_ref->lc.v4f32 = LLVMVectorType(llvm_ref->lc.f32, 4); + llvm_ref->lc.v8i32 = LLVMVectorType(llvm_ref->lc.i32, 8); + // llvm_ref->lc.iN_wavemask = LLVMIntTypeInContext(llvm_ref->lc.context, llvm_ref->lc.wave_size); + // llvm_ref->lc.iN_ballotmask = LLVMIntTypeInContext(llvm_ref->lc.context, ballot_mask_bits); + + llvm_ref->lc.i8_0 = LLVMConstInt(llvm_ref->lc.i8, 0, false); + llvm_ref->lc.i8_1 = LLVMConstInt(llvm_ref->lc.i8, 1, false); + llvm_ref->lc.i16_0 = LLVMConstInt(llvm_ref->lc.i16, 0, false); + llvm_ref->lc.i16_1 = LLVMConstInt(llvm_ref->lc.i16, 1, false); + llvm_ref->lc.i32_0 = LLVMConstInt(llvm_ref->lc.i32, 0, false); + llvm_ref->lc.i32_1 = LLVMConstInt(llvm_ref->lc.i32, 1, false); + llvm_ref->lc.i64_0 = LLVMConstInt(llvm_ref->lc.i64, 0, false); + llvm_ref->lc.i64_1 = LLVMConstInt(llvm_ref->lc.i64, 1, false); + llvm_ref->lc.i128_0 = LLVMConstInt(llvm_ref->lc.i128, 0, false); + llvm_ref->lc.i128_1 = LLVMConstInt(llvm_ref->lc.i128, 1, false); + llvm_ref->lc.f16_0 = LLVMConstReal(llvm_ref->lc.f16, 0.0); + llvm_ref->lc.f16_1 = LLVMConstReal(llvm_ref->lc.f16, 1.0); + llvm_ref->lc.f32_0 = LLVMConstReal(llvm_ref->lc.f32, 0.0); + llvm_ref->lc.f32_1 = LLVMConstReal(llvm_ref->lc.f32, 1.0); + llvm_ref->lc.f64_0 = LLVMConstReal(llvm_ref->lc.f64, 0.0); + llvm_ref->lc.f64_1 = LLVMConstReal(llvm_ref->lc.f64, 1.0); + + llvm_ref->lc.i1false = LLVMConstInt(llvm_ref->lc.i1, 0, false); + llvm_ref->lc.i1true = LLVMConstInt(llvm_ref->lc.i1, 1, false); + llvm_ref->lc.float_mode = 0; //TODO: default value, when required take this value as parameter } void DestroyLLVM(struct libresoc_llvm *llvm_ref) @@ -46,25 +118,2355 @@ static uint64_t orc_sym_resolver(const char *name, void *ctx) return (uint64_t)address; } -void libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir) +static LLVMTypeRef arg_llvm_type(enum arg_type type, unsigned size, struct libresoc_llvm_context *ctx) +{ + if (type == ARG_FLOAT) { + return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size); + } else if (type == ARG_INT) { + return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size); + } else { + LLVMTypeRef ptr_type; + switch (type) { + case ARG_CONST_PTR: + ptr_type = ctx->i8; + break; + case ARG_CONST_FLOAT_PTR: + ptr_type = ctx->f32; + break; + case ARG_CONST_PTR_PTR: + ptr_type = LLVMPointerType(ctx->i8, 0); + break; + case ARG_CONST_DESC_PTR: + ptr_type = ctx->v4i32; + break; + case ARG_CONST_IMAGE_PTR: + ptr_type = ctx->v8i32; + break; + default: + unreachable("unknown arg type"); + } + if (size == 1) { + //return ac_array_in_const32_addr_space(ptr_type); + return LLVMPointerType(ptr_type, 0); //address space may be wrong + } else { + assert(size == 2); + return LLVMPointerType(ptr_type, 0); + } + } +} +static LLVMValueRef get_src(struct libresoc_nir_tran_ctx *ctx, nir_src src) +{ + assert(src.is_ssa); + // printf("index %d\n", src.ssa->index); + return ctx->ssa_defs[src.ssa->index]; +} + +static uint32_t widen_mask(uint32_t mask, unsigned multiplier) +{ + uint32_t new_mask = 0; + for (unsigned i = 0; i < 32 && (1u << i) <= mask; ++i) + if (mask & (1u << i)) + new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); + return new_mask; +} + +static void get_deref_offset(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr, bool vs_in, + unsigned *vertex_index_out, LLVMValueRef *vertex_index_ref, + unsigned *const_out, LLVMValueRef *indir_out) +{ + nir_variable *var = nir_deref_instr_get_variable(instr); + nir_deref_path path; + unsigned idx_lvl = 1; + + nir_deref_path_init(&path, instr, NULL); + + if (vertex_index_out != NULL || vertex_index_ref != NULL) { + if (vertex_index_ref) { + *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index); + if (vertex_index_out) + *vertex_index_out = 0; + } else { + *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index); + } + ++idx_lvl; + } + + uint32_t const_offset = 0; + LLVMValueRef offset = NULL; + + if (var->data.compact) { + assert(instr->deref_type == nir_deref_type_array); + const_offset = nir_src_as_uint(instr->arr.index); + goto out; + } + + for (; path.path[idx_lvl]; ++idx_lvl) { + const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; + if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { + unsigned index = path.path[idx_lvl]->strct.index; + + for (unsigned i = 0; i < index; i++) { + const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); + const_offset += glsl_count_attribute_slots(ft, vs_in); + } + } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) { + unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in); + if (nir_src_is_const(path.path[idx_lvl]->arr.index)) { + const_offset += size * nir_src_as_uint(path.path[idx_lvl]->arr.index); + } else { + LLVMValueRef array_off = + LLVMBuildMul(ctx->lc.builder, LLVMConstInt(ctx->lc.i32, size, 0), + get_src(ctx, path.path[idx_lvl]->arr.index), ""); + if (offset) + offset = LLVMBuildAdd(ctx->lc.builder, offset, array_off, ""); + else + offset = array_off; + } + } else + unreachable("Uhandled deref type in get_deref_instr_offset"); + } + +out: + nir_deref_path_finish(&path); + + if (const_offset && offset) + offset = + LLVMBuildAdd(ctx->lc.builder, offset, LLVMConstInt(ctx->lc.i32, const_offset, 0), ""); + + *const_out = const_offset; + *indir_out = offset; +} + +static unsigned type_scalar_size_bytes(const struct glsl_type *type) +{ + assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type)); + return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; +} + + +static LLVMValueRef emit_int_cmp(struct libresoc_llvm_context *lc, LLVMIntPredicate pred, + LLVMValueRef src0, LLVMValueRef src1) +{ + LLVMTypeRef src0_type = LLVMTypeOf(src0); + LLVMTypeRef src1_type = LLVMTypeOf(src1); + + if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { + src1 = LLVMBuildIntToPtr(lc->builder, src1, src0_type, ""); + } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) { + src0 = LLVMBuildIntToPtr(lc->builder, src0, src1_type, ""); + } + + LLVMValueRef result = LLVMBuildICmp(lc->builder, pred, src0, src1, ""); + return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false), + lc->i32_0, ""); +} + +static LLVMValueRef emit_float_cmp(struct libresoc_llvm_context *lc, LLVMRealPredicate pred, + LLVMValueRef src0, LLVMValueRef src1) +{ + LLVMValueRef result; + src0 = to_float(lc, src0); + src1 = to_float(lc, src1); + result = LLVMBuildFCmp(lc->builder, pred, src0, src1, ""); + return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false), + lc->i32_0, ""); +} + +static LLVMValueRef emit_intrin_1f_param(struct libresoc_llvm_context *lc, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0) +{ + char name[64], type[64]; + LLVMValueRef params[] = { + to_float(lc, src0), + }; + + build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + return build_intrinsic(lc, name, result_type, params, 1, FUNC_ATTR_READNONE); +} + +static LLVMValueRef emit_intrin_1f_param_scalar(struct libresoc_llvm_context *lc, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0) +{ + if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind) + return emit_intrin_1f_param(lc, intrin, result_type, src0); + + LLVMTypeRef elem_type = LLVMGetElementType(result_type); + LLVMValueRef ret = LLVMGetUndef(result_type); + + /* Scalarize the intrinsic, because vectors are not supported. */ + for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) { + char name[64], type[64]; + LLVMValueRef params[] = { + to_float(lc, llvm_extract_elem(lc, src0, i)), + }; + + build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + ret = LLVMBuildInsertElement( + lc->builder, ret, + build_intrinsic(lc, name, elem_type, params, 1, FUNC_ATTR_READNONE), + LLVMConstInt(lc->i32, i, 0), ""); + } + return ret; +} + +static LLVMValueRef emit_intrin_2f_param(struct libresoc_llvm_context *ctx, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0, + LLVMValueRef src1) +{ + char name[64], type[64]; + LLVMValueRef params[] = { + to_float(ctx, src0), + to_float(ctx, src1), + }; + + build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + return build_intrinsic(ctx, name, result_type, params, 2, FUNC_ATTR_READNONE); +} + +static LLVMValueRef emit_intrin_3f_param(struct libresoc_llvm_context *ctx, const char *intrin, + LLVMTypeRef result_type, LLVMValueRef src0, + LLVMValueRef src1, LLVMValueRef src2) +{ + char name[64], type[64]; + LLVMValueRef params[] = { + to_float(ctx, src0), + to_float(ctx, src1), + to_float(ctx, src2), + }; + + build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + return build_intrinsic(ctx, name, result_type, params, 3, FUNC_ATTR_READNONE); +} + +static LLVMValueRef emit_bcsel(struct libresoc_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, + LLVMValueRef src2) +{ + LLVMTypeRef src1_type = LLVMTypeOf(src1); + LLVMTypeRef src2_type = LLVMTypeOf(src2); + + if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) { + src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, ""); + } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { + src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, ""); + } + + LLVMValueRef v = + LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, LLVMConstNull(LLVMTypeOf(src0)), ""); + return LLVMBuildSelect(ctx->builder, v, to_integer_or_pointer(ctx, src1), + to_integer_or_pointer(ctx, src2), ""); +} + +static LLVMValueRef emit_iabs(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + return build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, "")); +} + +static LLVMValueRef emit_uint_carry(struct libresoc_llvm_context *ctx, const char *intrin, + LLVMValueRef src0, LLVMValueRef src1) +{ + LLVMTypeRef ret_type; + LLVMTypeRef types[] = {ctx->i32, ctx->i1}; + LLVMValueRef res; + LLVMValueRef params[] = {src0, src1}; + ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true); + + res = build_intrinsic(ctx, intrin, ret_type, params, 2, FUNC_ATTR_READNONE); + + res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); + res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); + return res; +} + +static LLVMValueRef emit_b2f(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) +{ + assert(get_elem_bits(ctx, LLVMTypeOf(src0)) == 32); + LLVMValueRef result = + LLVMBuildAnd(ctx->builder, src0, const_uint_vec(ctx, LLVMTypeOf(src0), 0x3f800000), ""); + result = to_float(ctx, result); + + switch (bitsize) { + case 16: { + bool vec2 = LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind; + return LLVMBuildFPTrunc(ctx->builder, result, vec2 ? ctx->v2f16 : ctx->f16, ""); + } + case 32: + return result; + case 64: + return LLVMBuildFPExt(ctx->builder, result, ctx->f64, ""); + default: + unreachable("Unsupported bit size."); + } +} + +static LLVMValueRef emit_f2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + src0 = to_float(ctx, src0); + LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); + return LLVMBuildSExt(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""), + ctx->i32, ""); +} + +static LLVMValueRef emit_b2i(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) { - LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod"); - LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() }; - LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0); - LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type); - LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry"); - LLVMBuilderRef builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, entry); - LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp"); - LLVMBuildRet(builder, tmp); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); + + switch (bitsize) { + case 8: + return LLVMBuildTrunc(ctx->builder, result, ctx->i8, ""); + case 16: + return LLVMBuildTrunc(ctx->builder, result, ctx->i16, ""); + case 32: + return result; + case 64: + return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); + default: + unreachable("Unsupported bit size."); + } +} + +static LLVMValueRef emit_i2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); + return LLVMBuildSExt(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""), + ctx->i32, ""); +} + +static LLVMValueRef emit_f2f16(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef result; + LLVMValueRef cond = NULL; + + src0 = to_float(ctx, src0); + result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); + + /* need to convert back up to f32 */ + result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + return result; +} + +static LLVMValueRef emit_umul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0, + LLVMValueRef src1) +{ + LLVMValueRef dst64, result; + src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); + src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); + + dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); + dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); + result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); + return result; +} + +static LLVMValueRef emit_imul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0, + LLVMValueRef src1) +{ + LLVMValueRef dst64, result; + src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); + src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); + + dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); + dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); + result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); + return result; +} + +static LLVMValueRef emit_bfm(struct libresoc_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset) +{ + /* mask = ((1 << bits) - 1) << offset */ + return LLVMBuildShl( + ctx->builder, + LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""), + offset, ""); +} + +static LLVMValueRef emit_bitfield_select(struct libresoc_llvm_context *ctx, LLVMValueRef mask, + LLVMValueRef insert, LLVMValueRef base) +{ + /* Calculate: + * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base)) + * Use the right-hand side, which the LLVM backend can convert to V_BFI. + */ + return LLVMBuildXor( + ctx->builder, base, + LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), ""); +} + +static LLVMValueRef emit_pack_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0, + LLVMValueRef (*pack)(struct libresoc_llvm_context *ctx, + LLVMValueRef args[2])) +{ + LLVMValueRef comp[2]; + + src0 = to_float(ctx, src0); + comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); + comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); + + return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, ""); +} + +static LLVMValueRef emit_unpack_half_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); + LLVMValueRef temps[2], val; + int i; + + for (i = 0; i < 2; i++) { + val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; + val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); + val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); + temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); + } + return build_gather_values(ctx, temps, 2); +} + +// TODO: enable this whn ac_builddxy() is added +// static LLVMValueRef emit_ddxy(struct libresoc_nir_context *ctx, nir_op op, LLVMValueRef src0) +// { +// unsigned mask; +// int idx; +// LLVMValueRef result; + +// if (op == nir_op_fddx_fine) +// mask = TID_MASK_LEFT; +// else if (op == nir_op_fddy_fine) +// mask = TID_MASK_TOP; +// else +// mask = TID_MASK_TOP_LEFT; + +// /* for DDX we want to next X pixel, DDY next Y pixel. */ +// if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx) +// idx = 1; +// else +// idx = 2; + +// result = ac_build_ddxy(&ctx->ac, mask, idx, src0); +// return result; +// } + +static void setup_locals(struct libresoc_nir_tran_ctx *ctx, struct nir_function *func) +{ + int i, j; + ctx->num_locals = 0; + nir_foreach_function_temp_variable(variable, func->impl) + { + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + variable->data.driver_location = ctx->num_locals * 4; + variable->data.location_frac = 0; + ctx->num_locals += attrib_count; + } + ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); + if (!ctx->locals) + return; + + for (i = 0; i < ctx->num_locals; i++) { + for (j = 0; j < 4; j++) { + ctx->locals[i * 4 + j] = build_alloca_undef(&ctx->lc, ctx->lc.f32, "temp"); + } + } +} + +static void setup_scratch(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader) +{ + if (shader->scratch_size == 0) + return; + + ctx->scratch = + build_alloca_undef(&ctx->lc, LLVMArrayType(ctx->lc.i8, shader->scratch_size), "scratch"); +} + +static void setup_constant_data(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader) +{ + if (!shader->constant_data) + return; + + LLVMValueRef data = LLVMConstStringInContext(ctx->lc.context, shader->constant_data, + shader->constant_data_size, true); + LLVMTypeRef type = LLVMArrayType(ctx->lc.i8, shader->constant_data_size); + + unsigned address_space = 0; //TODO: dummay value + LLVMValueRef global = + LLVMAddGlobalInAddressSpace(*(ctx->lc.module), type, "const_data", address_space); + + LLVMSetInitializer(global, data); + LLVMSetGlobalConstant(global, true); + LLVMSetVisibility(global, LLVMHiddenVisibility); + ctx->constant_data = global; +} + +static LLVMTypeRef glsl_base_to_llvm_type(struct libresoc_llvm_context *lc, enum glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: + return lc->i32; + case GLSL_TYPE_INT8: + case GLSL_TYPE_UINT8: + return lc->i8; + case GLSL_TYPE_INT16: + case GLSL_TYPE_UINT16: + return lc->i16; + case GLSL_TYPE_FLOAT: + return lc->f32; + case GLSL_TYPE_FLOAT16: + return lc->f16; + case GLSL_TYPE_INT64: + case GLSL_TYPE_UINT64: + return lc->i64; + case GLSL_TYPE_DOUBLE: + return lc->f64; + default: + unreachable("unknown GLSL type"); + } +} + +static LLVMTypeRef glsl_to_llvm_type(struct libresoc_llvm_context *lc, const struct glsl_type *type) +{ + if (glsl_type_is_scalar(type)) { + return glsl_base_to_llvm_type(lc, glsl_get_base_type(type)); + } + + if (glsl_type_is_vector(type)) { + return LLVMVectorType(glsl_base_to_llvm_type(lc, glsl_get_base_type(type)), + glsl_get_vector_elements(type)); + } + + if (glsl_type_is_matrix(type)) { + return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_column_type(type)), + glsl_get_matrix_columns(type)); + } + + if (glsl_type_is_array(type)) { + return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_array_element(type)), + glsl_get_length(type)); + } + + assert(glsl_type_is_struct_or_ifc(type)); + + LLVMTypeRef member_types[glsl_get_length(type)]; + + for (unsigned i = 0; i < glsl_get_length(type); i++) { + member_types[i] = glsl_to_llvm_type(lc, glsl_get_struct_field(type, i)); + } + + return LLVMStructTypeInContext(lc->context, member_types, glsl_get_length(type), false); +} + +static void visit_load_const(struct libresoc_nir_tran_ctx *ctx, const nir_load_const_instr *instr) +{ + LLVMValueRef values[4], value = NULL; + LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size); + + for (unsigned i = 0; i < instr->def.num_components; ++i) { + switch (instr->def.bit_size) { + case 8: + values[i] = LLVMConstInt(element_type, instr->value[i].u8, false); + break; + case 16: + values[i] = LLVMConstInt(element_type, instr->value[i].u16, false); + break; + case 32: + values[i] = LLVMConstInt(element_type, instr->value[i].u32, false); + break; + case 64: + values[i] = LLVMConstInt(element_type, instr->value[i].u64, false); + break; + default: + fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size); + abort(); + } + } + if (instr->def.num_components > 1) { + value = LLVMConstVector(values, instr->def.num_components); + } else + value = values[0]; + + ctx->ssa_defs[instr->def.index] = value; +} + +static void visit_deref(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr) +{ + if (instr->mode != nir_var_mem_shared && instr->mode != nir_var_mem_global) + return; + + LLVMValueRef result = NULL; + switch (instr->deref_type) { + case nir_deref_type_var: { + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var); + result = entry->data; + break; + } + case nir_deref_type_struct: + if (instr->mode == nir_var_mem_global) { + nir_deref_instr *parent = nir_deref_instr_parent(instr); + uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index); + result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), + LLVMConstInt(ctx->lc.i32, offset, 0)); + } else { + result = build_gep0(&ctx->lc, get_src(ctx, instr->parent), + LLVMConstInt(ctx->lc.i32, instr->strct.index, 0)); + } + break; + case nir_deref_type_array: + if (instr->mode == nir_var_mem_global) { + nir_deref_instr *parent = nir_deref_instr_parent(instr); + unsigned stride = glsl_get_explicit_stride(parent->type); + + if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) || + (glsl_type_is_vector(parent->type) && stride == 0)) + stride = type_scalar_size_bytes(parent->type); + + assert(stride > 0); + LLVMValueRef index = get_src(ctx, instr->arr.index); + if (LLVMTypeOf(index) != ctx->lc.i64) + index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, ""); + + LLVMValueRef offset = + LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), ""); + + result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset); + } else { + result = + build_gep0(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index)); + } + break; + case nir_deref_type_ptr_as_array: + if (instr->mode == nir_var_mem_global) { + unsigned stride = nir_deref_instr_array_stride(instr); + + LLVMValueRef index = get_src(ctx, instr->arr.index); + if (LLVMTypeOf(index) != ctx->lc.i64) + index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, ""); + + LLVMValueRef offset = + LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), ""); + + result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset); + } else { + result = + build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index)); + } + break; + case nir_deref_type_cast: { + result = get_src(ctx, instr->parent); + + /* We can't use the structs from LLVM because the shader + * specifies its own offsets. */ + LLVMTypeRef pointee_type = ctx->lc.i8; + if (instr->mode == nir_var_mem_shared) + pointee_type = glsl_to_llvm_type(&ctx->lc, instr->type); + + unsigned address_space; + + switch (instr->mode) { + case nir_var_mem_shared: + address_space = 1; + break; + case nir_var_mem_global: + address_space = 0; + break; + default: + unreachable("Unhandled address space"); + } + + LLVMTypeRef type = LLVMPointerType(pointee_type, address_space); + + if (LLVMTypeOf(result) != type) { + if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) { + result = LLVMBuildBitCast(ctx->lc.builder, result, type, ""); + } else { + result = LLVMBuildIntToPtr(ctx->lc.builder, result, type, ""); + } + } + break; + } + default: + unreachable("Unhandled deref_instr deref type"); + } + + ctx->ssa_defs[instr->dest.ssa.index] = result; +} + +static LLVMTypeRef get_def_type(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_def *def) +{ + LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, def->bit_size); + if (def->num_components > 1) { + type = LLVMVectorType(type, def->num_components); + } + return type; +} + +static void visit_phi(struct libresoc_nir_tran_ctx *ctx, nir_phi_instr *instr) +{ + LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); + LLVMValueRef result = LLVMBuildPhi(ctx->lc.builder, type, ""); + + ctx->ssa_defs[instr->dest.ssa.index] = result; + _mesa_hash_table_insert(ctx->phis, instr, result); +} + +static bool is_def_used_in_an_export(const nir_ssa_def *def) +{ + nir_foreach_use (use_src, def) { + if (use_src->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr); + if (instr->intrinsic == nir_intrinsic_store_deref) + return true; + } else if (use_src->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr); + if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) { + return true; + } + } + } + return false; +} + +static void visit_ssa_undef(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_undef_instr *instr) +{ + unsigned num_components = instr->def.num_components; + LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size); + + if (/*!ctx->abi->convert_undef_to_zero ||*/ is_def_used_in_an_export(&instr->def)) { + LLVMValueRef undef; + + if (num_components == 1) + undef = LLVMGetUndef(type); + else { + undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + } + ctx->ssa_defs[instr->def.index] = undef; + } else { + LLVMValueRef zero = LLVMConstInt(type, 0, false); + if (num_components > 1) { + zero = build_gather_values_extended(&ctx->lc, &zero, 4, 0, false, false); + } + ctx->ssa_defs[instr->def.index] = zero; + } +} + +static void visit_jump(struct libresoc_llvm_context *lc, const nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: + build_break(lc); + break; + case nir_jump_continue: + build_continue(lc); + break; + default: + fprintf(stderr, "Unknown NIR jump instr: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); + } +} + +static LLVMValueRef get_alu_src(struct libresoc_nir_tran_ctx *ctx, nir_alu_src src, + unsigned num_components) +{ + LLVMValueRef value = get_src(ctx, src.src); + bool need_swizzle = false; + + assert(value); + unsigned src_components = get_llvm_num_components(value); + for (unsigned i = 0; i < num_components; ++i) { + assert(src.swizzle[i] < src_components); + if (src.swizzle[i] != i) + need_swizzle = true; + } + + if (need_swizzle || num_components != src_components) { + LLVMValueRef masks[] = {LLVMConstInt(ctx->lc.i32, src.swizzle[0], false), + LLVMConstInt(ctx->lc.i32, src.swizzle[1], false), + LLVMConstInt(ctx->lc.i32, src.swizzle[2], false), + LLVMConstInt(ctx->lc.i32, src.swizzle[3], false)}; + + if (src_components > 1 && num_components == 1) { + value = LLVMBuildExtractElement(ctx->lc.builder, value, masks[0], ""); + } else if (src_components == 1 && num_components > 1) { + LLVMValueRef values[] = {value, value, value, value}; + value = build_gather_values(&ctx->lc, values, num_components); + } else { + LLVMValueRef swizzle = LLVMConstVector(masks, num_components); + value = LLVMBuildShuffleVector(ctx->lc.builder, value, value, swizzle, ""); + } + } + assert(!src.negate); + assert(!src.abs); + return value; +} + +static void visit_alu(struct libresoc_nir_tran_ctx *ctx, const nir_alu_instr *instr) +{ + LLVMValueRef src[4], result = NULL; + unsigned num_components = instr->dest.dest.ssa.num_components; + unsigned src_components; + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); + + assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); + switch (instr->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + src_components = 1; + break; + case nir_op_pack_half_2x16: + case nir_op_pack_snorm_2x16: + case nir_op_pack_unorm_2x16: + src_components = 2; + break; + case nir_op_unpack_half_2x16: + src_components = 1; + break; + case nir_op_cube_face_coord: + case nir_op_cube_face_index: + src_components = 3; + break; + default: + src_components = num_components; + break; + } + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + src[i] = get_alu_src(ctx, instr->src[i], src_components); + + switch (instr->op) { + case nir_op_mov: + result = src[0]; + break; + case nir_op_fneg: + src[0] = to_float(&ctx->lc, src[0]); + result = LLVMBuildFNeg(ctx->lc.builder, src[0], ""); + if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) { + /* fneg will be optimized by backend compiler with sign + * bit removed via XOR. This is probably a LLVM bug. + */ + result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size); + } + break; + case nir_op_ineg: + result = LLVMBuildNeg(ctx->lc.builder, src[0], ""); + break; + case nir_op_inot: + result = LLVMBuildNot(ctx->lc.builder, src[0], ""); + break; + case nir_op_iadd: + result = LLVMBuildAdd(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_fadd: + src[0] = to_float(&ctx->lc, src[0]); + src[1] = to_float(&ctx->lc, src[1]); + result = LLVMBuildFAdd(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_fsub: + src[0] = to_float(&ctx->lc, src[0]); + src[1] = to_float(&ctx->lc, src[1]); + result = LLVMBuildFSub(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_isub: + result = LLVMBuildSub(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_imul: + result = LLVMBuildMul(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_imod: + result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_umod: + result = LLVMBuildURem(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_irem: + result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_idiv: + result = LLVMBuildSDiv(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_udiv: + result = LLVMBuildUDiv(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_fmul: + src[0] = to_float(&ctx->lc, src[0]); + src[1] = to_float(&ctx->lc, src[1]); + result = LLVMBuildFMul(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_frcp: + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL && get_type_size(def_type) == 8) { + result = LLVMBuildFDiv(ctx->lc.builder, ctx->lc.f64_1, to_float(&ctx->lc, src[0]), ""); + } else { + result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rcp", + to_float_type(&ctx->lc, def_type), src[0]); + } + // TODO: abi not supported + // if (ctx->abi->clamp_div_by_zero) + // result = build_fmin(&ctx->lc, result, + // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX)); + break; + case nir_op_iand: + result = LLVMBuildAnd(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_ior: + result = LLVMBuildOr(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_ixor: + result = LLVMBuildXor(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_ishl: + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) < + get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), ""); + else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) > + get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), ""); + result = LLVMBuildShl(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_ishr: + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) < + get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), ""); + else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) > + get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), ""); + result = LLVMBuildAShr(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_ushr: + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) < + get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), ""); + else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) > + get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))) + src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), ""); + result = LLVMBuildLShr(ctx->lc.builder, src[0], src[1], ""); + break; + case nir_op_ilt32: + result = emit_int_cmp(&ctx->lc, LLVMIntSLT, src[0], src[1]); + break; + case nir_op_ine32: + result = emit_int_cmp(&ctx->lc, LLVMIntNE, src[0], src[1]); + break; + case nir_op_ieq32: + result = emit_int_cmp(&ctx->lc, LLVMIntEQ, src[0], src[1]); + break; + case nir_op_ige32: + result = emit_int_cmp(&ctx->lc, LLVMIntSGE, src[0], src[1]); + break; + case nir_op_ult32: + result = emit_int_cmp(&ctx->lc, LLVMIntULT, src[0], src[1]); + break; + case nir_op_uge32: + result = emit_int_cmp(&ctx->lc, LLVMIntUGE, src[0], src[1]); + break; + case nir_op_feq32: + result = emit_float_cmp(&ctx->lc, LLVMRealOEQ, src[0], src[1]); + break; + case nir_op_fneu32: + result = emit_float_cmp(&ctx->lc, LLVMRealUNE, src[0], src[1]); + break; + case nir_op_flt32: + result = emit_float_cmp(&ctx->lc, LLVMRealOLT, src[0], src[1]); + break; + case nir_op_fge32: + result = emit_float_cmp(&ctx->lc, LLVMRealOGE, src[0], src[1]); + break; + case nir_op_fabs: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.fabs", to_float_type(&ctx->lc, def_type), src[0]); + if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) { + /* fabs will be optimized by backend compiler with sign + * bit removed via AND. + */ + result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size); + } + break; + case nir_op_iabs: + result = emit_iabs(&ctx->lc, src[0]); + break; + case nir_op_imax: + result = build_imax(&ctx->lc, src[0], src[1]); + break; + case nir_op_imin: + result = build_imin(&ctx->lc, src[0], src[1]); + break; + case nir_op_umax: + result = build_umax(&ctx->lc, src[0], src[1]); + break; + case nir_op_umin: + result = build_umin(&ctx->lc, src[0], src[1]); + break; + case nir_op_isign: + result = build_isign(&ctx->lc, src[0]); + break; + case nir_op_fsign: + src[0] = to_float(&ctx->lc, src[0]); + result = build_fsign(&ctx->lc, src[0]); + break; + case nir_op_ffloor: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.floor", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_ftrunc: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.trunc", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_fceil: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.ceil", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_fround_even: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.rint", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_ffract: + result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.fract", + to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_fsin: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.sin", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_fcos: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.cos", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_fsqrt: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.sqrt", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_fexp2: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.exp2", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_flog2: + result = + emit_intrin_1f_param(&ctx->lc, "llvm.log2", to_float_type(&ctx->lc, def_type), src[0]); + break; + case nir_op_frsq: + result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rsq", + to_float_type(&ctx->lc, def_type), src[0]); + // TODO: abi not enabled + // if (ctx->abi->clamp_div_by_zero) + // result = build_fmin(&ctx->lc, result, + // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX)); + break; + case nir_op_frexp_exp: + // TODO: enable this when ac_build_frexp_exp() is added + // src[0] = to_float(&ctx->lc, src[0]); + // result = ac_build_frexp_exp(&ctx->lc, src[0], get_elem_bits(&ctx->lc, LLVMTypeOf(src[0]))); + // if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) == 16) + // result = LLVMBuildSExt(ctx->lc.builder, result, ctx->lc.i32, ""); + break; + case nir_op_frexp_sig: + // TODO: enable this when ac_build_frexp_mant() is added + // src[0] = to_float(&ctx->lc, src[0]); + // result = ac_build_frexp_mant(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size); + // break; + // case nir_op_fpow: + // result = emit_intrin_2f_param(&ctx->lc, "llvm.pow", to_float_type(&ctx->lc, def_type), + // src[0], src[1]); + break; + case nir_op_fmax: + result = emit_intrin_2f_param(&ctx->lc, "llvm.maxnum", to_float_type(&ctx->lc, def_type), + src[0], src[1]); + break; + case nir_op_fmin: + result = emit_intrin_2f_param(&ctx->lc, "llvm.minnum", to_float_type(&ctx->lc, def_type), + src[0], src[1]); + break; + case nir_op_ffma: + result = + emit_intrin_3f_param(&ctx->lc, "llvm.fmuladd", + to_float_type(&ctx->lc, def_type), src[0], src[1], src[2]); + break; + case nir_op_ldexp: + src[0] = to_float(&ctx->lc, src[0]); + if (get_elem_bits(&ctx->lc, def_type) == 32) + result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f32", ctx->lc.f32, src, 2, + FUNC_ATTR_READNONE); + else if (get_elem_bits(&ctx->lc, def_type) == 16) + result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f16", ctx->lc.f16, src, 2, + FUNC_ATTR_READNONE); + else + result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f64", ctx->lc.f64, src, 2, + FUNC_ATTR_READNONE); + break; + case nir_op_bfm: + result = emit_bfm(&ctx->lc, src[0], src[1]); + break; + case nir_op_bitfield_select: + result = emit_bitfield_select(&ctx->lc, src[0], src[1], src[2]); + break; + case nir_op_ubfe: + result = build_bfe(&ctx->lc, src[0], src[1], src[2], false); + break; + case nir_op_ibfe: + result = build_bfe(&ctx->lc, src[0], src[1], src[2], true); + break; + case nir_op_bitfield_reverse: + result = build_bitfield_reverse(&ctx->lc, src[0]); + break; + case nir_op_bit_count: + result = build_bit_count(&ctx->lc, src[0]); + break; + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + src[i] = to_integer(&ctx->lc, src[i]); + result = build_gather_values(&ctx->lc, src, num_components); + break; + case nir_op_f2i8: + case nir_op_f2i16: + case nir_op_f2i32: + case nir_op_f2i64: + src[0] = to_float(&ctx->lc, src[0]); + result = LLVMBuildFPToSI(ctx->lc.builder, src[0], def_type, ""); + break; + case nir_op_f2u8: + case nir_op_f2u16: + case nir_op_f2u32: + case nir_op_f2u64: + src[0] = to_float(&ctx->lc, src[0]); + result = LLVMBuildFPToUI(ctx->lc.builder, src[0], def_type, ""); + break; + case nir_op_i2f16: + case nir_op_i2f32: + case nir_op_i2f64: + result = LLVMBuildSIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), ""); + break; + case nir_op_u2f16: + case nir_op_u2f32: + case nir_op_u2f64: + result = LLVMBuildUIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), ""); + break; + case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2fmp: + src[0] = to_float(&ctx->lc, src[0]); + + /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it, + * all f32->f16 conversions have to round towards zero, because both scalar + * and vec2 down-conversions have to round equally. + */ + if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) { + src[0] = to_float(&ctx->lc, src[0]); + + if (LLVMTypeOf(src[0]) == ctx->lc.f64) + src[0] = LLVMBuildFPTrunc(ctx->lc.builder, src[0], ctx->lc.f32, ""); + + /* Fast path conversion. This only works if NIR is vectorized + * to vec2 16. + */ + if (LLVMTypeOf(src[0]) == ctx->lc.v2f32) { + LLVMValueRef args[] = { + llvm_extract_elem(&ctx->lc, src[0], 0), + llvm_extract_elem(&ctx->lc, src[0], 1), + }; + result = build_cvt_pkrtz_f16(&ctx->lc, args); + break; + } + + assert(get_llvm_num_components(src[0]) == 1); + LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->lc.f32)}; + result = build_cvt_pkrtz_f16(&ctx->lc, param); + result = LLVMBuildExtractElement(ctx->lc.builder, result, ctx->lc.i32_0, ""); + } else { + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type)) + result = + LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), ""); + else + result = + LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), ""); + } + break; + case nir_op_f2f16_rtne: + case nir_op_f2f32: + case nir_op_f2f64: + src[0] = to_float(&ctx->lc, src[0]); + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type)) + result = LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), ""); + else + result = + LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), ""); + break; + case nir_op_u2u8: + case nir_op_u2u16: + case nir_op_u2ump: + case nir_op_u2u32: + case nir_op_u2u64: + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type)) + result = LLVMBuildZExt(ctx->lc.builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, ""); + break; + case nir_op_i2i8: + case nir_op_i2i16: + case nir_op_i2imp: + case nir_op_i2i32: + case nir_op_i2i64: + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type)) + result = LLVMBuildSExt(ctx->lc.builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, ""); + break; + case nir_op_b32csel: + result = emit_bcsel(&ctx->lc, src[0], src[1], src[2]); + break; + case nir_op_find_lsb: + result = find_lsb(&ctx->lc, ctx->lc.i32, src[0]); + break; + case nir_op_ufind_msb: + result = build_umsb(&ctx->lc, src[0], ctx->lc.i32); + break; + case nir_op_ifind_msb: + result = build_imsb(&ctx->lc, src[0], ctx->lc.i32); + break; + case nir_op_uadd_carry: + result = emit_uint_carry(&ctx->lc, "llvm.uadd.with.overflow.i32", src[0], src[1]); + break; + case nir_op_usub_borrow: + result = emit_uint_carry(&ctx->lc, "llvm.usub.with.overflow.i32", src[0], src[1]); + break; + case nir_op_b2f16: + case nir_op_b2f32: + case nir_op_b2f64: + result = emit_b2f(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_f2b32: + result = emit_f2b(&ctx->lc, src[0]); + break; + case nir_op_b2i8: + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: + result = emit_b2i(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_i2b32: + result = emit_i2b(&ctx->lc, src[0]); + break; + case nir_op_fquantize2f16: + result = emit_f2f16(&ctx->lc, src[0]); + break; + case nir_op_umul_high: + result = emit_umul_high(&ctx->lc, src[0], src[1]); + break; + case nir_op_imul_high: + result = emit_imul_high(&ctx->lc, src[0], src[1]); + break; + case nir_op_pack_half_2x16: + result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pkrtz_f16); + break; + case nir_op_pack_snorm_2x16: + result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_i16); + break; + case nir_op_pack_unorm_2x16: + result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_u16); + break; + case nir_op_unpack_half_2x16: + result = emit_unpack_half_2x16(&ctx->lc, src[0]); + break; + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + // TODO: enable this when emit_ddxy() is added + //result = emit_ddxy(ctx, instr->op, src[0]); + break; + + case nir_op_unpack_64_2x32_split_x: { + assert(get_llvm_num_components(src[0]) == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, ""); + result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, ""); + break; + } + + case nir_op_unpack_64_2x32_split_y: { + assert(get_llvm_num_components(src[0]) == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, ""); + result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, ""); + break; + } + + case nir_op_pack_64_2x32_split: { + LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2); + result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i64, ""); + break; + } + + case nir_op_pack_32_2x16_split: { + LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2); + result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i32, ""); + break; + } + + case nir_op_unpack_32_2x16_split_x: { + LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, ""); + result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, ""); + break; + } + + case nir_op_unpack_32_2x16_split_y: { + LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, ""); + result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, ""); + break; + } + + case nir_op_cube_face_coord: { + src[0] = to_float(&ctx->lc, src[0]); + LLVMValueRef results[2]; + LLVMValueRef in[3]; + for (unsigned chan = 0; chan < 3; chan++) + in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan); + results[0] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubesc", ctx->lc.f32, in, 3, + FUNC_ATTR_READNONE); + results[1] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubetc", ctx->lc.f32, in, 3, + FUNC_ATTR_READNONE); + LLVMValueRef ma = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubema", ctx->lc.f32, in, 3, + FUNC_ATTR_READNONE); + results[0] = build_fdiv(&ctx->lc, results[0], ma); + results[1] = build_fdiv(&ctx->lc, results[1], ma); + LLVMValueRef offset = LLVMConstReal(ctx->lc.f32, 0.5); + results[0] = LLVMBuildFAdd(ctx->lc.builder, results[0], offset, ""); + results[1] = LLVMBuildFAdd(ctx->lc.builder, results[1], offset, ""); + result = build_gather_values(&ctx->lc, results, 2); + break; + } + + case nir_op_cube_face_index: { + src[0] = to_float(&ctx->lc, src[0]); + LLVMValueRef in[3]; + for (unsigned chan = 0; chan < 3; chan++) + in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan); + result = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubeid", ctx->lc.f32, in, 3, + FUNC_ATTR_READNONE); + break; + } + + default: + fprintf(stderr, "Unknown NIR alu instr: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); + } + + if (result) { + assert(instr->dest.dest.is_ssa); + result = to_integer_or_pointer(&ctx->lc, result); + ctx->ssa_defs[instr->dest.dest.ssa.index] = result; + } +} + +static LLVMValueRef visit_load_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr) +{ + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + + LLVMValueRef values[8]; + int idx = 0; + int ve = instr->dest.ssa.num_components; + unsigned comp = 0; + LLVMValueRef indir_index; + LLVMValueRef ret; + unsigned const_index; + unsigned stride = 4; + int mode = deref->mode; + + if (var) { + bool vs_in = ctx->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in; + idx = var->data.driver_location; + comp = var->data.location_frac; + mode = var->data.mode; + + get_deref_offset(ctx, deref, vs_in, NULL, NULL, &const_index, &indir_index); + + if (var->data.compact) { + stride = 1; + const_index += comp; + comp = 0; + } + } + + if (instr->dest.ssa.bit_size == 64 && + (deref->mode == nir_var_shader_in || deref->mode == nir_var_shader_out || + deref->mode == nir_var_function_temp)) + ve *= 2; + + switch (mode) { + case nir_var_shader_in: + /* TODO: remove this after RADV switches to lowered IO */ + // if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { + // return load_tess_varyings(ctx, instr, true); + // } + + // if (ctx->stage == MESA_SHADER_GEOMETRY) { + // LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size); + // LLVMValueRef indir_index; + // unsigned const_index, vertex_index; + // get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index); + // assert(indir_index == NULL); + + // return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location, + // var->data.location_frac, instr->num_components, vertex_index, + // const_index, type); + // } + + // for (unsigned chan = comp; chan < ve + comp; chan++) { + // if (indir_index) { + // unsigned count = + // glsl_count_attribute_slots(var->type, ctx->stage == MESA_SHADER_VERTEX); + // count -= chan / 4; + // LLVMValueRef tmp_vec = build_gather_values_extended( + // &ctx->lc, ctx->abi->inputs + idx + chan, count, stride, false, true); + + // values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, ""); + // } else + // values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; + // } + break; + case nir_var_function_temp: + for (unsigned chan = 0; chan < ve; chan++) { + if (indir_index) { + unsigned count = glsl_count_attribute_slots(var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = build_gather_values_extended( + &ctx->lc, ctx->locals + idx + chan, count, stride, true, true); + + values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, ""); + } else { + values[chan] = + LLVMBuildLoad(ctx->lc.builder, ctx->locals[idx + chan + const_index * stride], ""); + } + } + break; + case nir_var_shader_out: + /* TODO: remove this after RADV switches to lowered IO */ + // if (ctx->stage == MESA_SHADER_TESS_CTRL) { + // return load_tess_varyings(ctx, instr, false); + // } + + // if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch) + // return ctx->abi->emit_fbfetch(ctx->abi); + + // for (unsigned chan = comp; chan < ve + comp; chan++) { + // if (indir_index) { + // unsigned count = glsl_count_attribute_slots(var->type, false); + // count -= chan / 4; + // LLVMValueRef tmp_vec = build_gather_values_extended( + // &ctx->lc, ctx->abi->outputs + idx + chan, count, stride, true, true); + + // values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, ""); + // } else { + // values[chan] = LLVMBuildLoad(ctx->lc.builder, + // ctx->abi->outputs[idx + chan + const_index * stride], ""); + // } + // } + break; + case nir_var_mem_global: { + LLVMValueRef address = get_src(ctx, instr->src[0]); + LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); + unsigned explicit_stride = glsl_get_explicit_stride(deref->type); + unsigned natural_stride = type_scalar_size_bytes(deref->type); + unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = get_elem_bits(&ctx->lc, result_type) / 8; + bool split_loads = false; + + if (stride != natural_stride || split_loads) { + if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind) + result_type = LLVMGetElementType(result_type); + + LLVMTypeRef ptr_type = + LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, ""); + + for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) { + LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, i * stride / natural_stride, 0); + values[i] = + LLVMBuildLoad(ctx->lc.builder, build_gep_ptr(&ctx->lc, address, offset), ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic); + } + return build_gather_values(&ctx->lc, values, instr->dest.ssa.num_components); + } else { + LLVMTypeRef ptr_type = + LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, ""); + LLVMValueRef val = LLVMBuildLoad(ctx->lc.builder, address, ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic); + return val; + } + } + default: + unreachable("unhandle variable mode"); + } + ret = build_varying_gather_values(&ctx->lc, values, ve, comp); + return LLVMBuildBitCast(ctx->lc.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); +} + +static void visit_store_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr) +{ + // if (ctx->lc.postponed_kill) { + // LLVMValueRef cond = LLVMBuildLoad(ctx->lc.builder, ctx->lc.postponed_kill, ""); + // ac_build_ifcc(&ctx->lc, cond, 7002); + // } + + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + + LLVMValueRef temp_ptr, value; + int idx = 0; + unsigned comp = 0; + LLVMValueRef src = to_float(&ctx->lc, get_src(ctx, instr->src[1])); + int writemask = instr->const_index[0]; + LLVMValueRef indir_index; + unsigned const_index; + + if (var) { + get_deref_offset(ctx, deref, false, NULL, NULL, &const_index, &indir_index); + idx = var->data.driver_location; + comp = var->data.location_frac; + + if (var->data.compact) { + const_index += comp; + comp = 0; + } + } + + if (get_elem_bits(&ctx->lc, LLVMTypeOf(src)) == 64 && + (deref->mode == nir_var_shader_out || deref->mode == nir_var_function_temp)) { + + src = LLVMBuildBitCast(ctx->lc.builder, src, + LLVMVectorType(ctx->lc.f32, get_llvm_num_components(src) * 2), ""); + + writemask = widen_mask(writemask, 2); + } + + writemask = writemask << comp; + + switch (deref->mode) { + case nir_var_shader_out: + /* TODO: remove this after RADV switches to lowered IO */ + // if (ctx->stage == MESA_SHADER_TESS_CTRL) { + // LLVMValueRef vertex_index = NULL; + // LLVMValueRef indir_index = NULL; + // unsigned const_index = 0; + // const bool is_patch = var->data.patch || + // var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || + // var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; + + // get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index, + // &indir_index); + + // ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src, + // writemask, var->data.location_frac, var->data.driver_location); + // break; + // } + + for (unsigned chan = 0; chan < 8; chan++) { + int stride = 4; + if (!(writemask & (1 << chan))) + continue; + + value = llvm_extract_elem(&ctx->lc, src, chan - comp); + + // if (var->data.compact) + // stride = 1; + // if (indir_index) { + // unsigned count = glsl_count_attribute_slots(var->type, false); + // count -= chan / 4; + // LLVMValueRef tmp_vec = build_gather_values_extended( + // &ctx->lc, ctx->abi->outputs + idx + chan, count, stride, true, true); + + // tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, ""); + // build_store_values_extended(&ctx->lc, ctx->abi->outputs + idx + chan, count, stride, + // tmp_vec); + + // } else { + // temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride]; + + // LLVMBuildStore(ctx->lc.builder, value, temp_ptr); + // } + } + break; + case nir_var_function_temp: + for (unsigned chan = 0; chan < 8; chan++) { + if (!(writemask & (1 << chan))) + continue; + + value = llvm_extract_elem(&ctx->lc, src, chan); + // if (indir_index) { + // unsigned count = glsl_count_attribute_slots(var->type, false); + // count -= chan / 4; + // LLVMValueRef tmp_vec = build_gather_values_extended( + // &ctx->lc, ctx->locals + idx + chan, count, 4, true, true); + + // tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, ""); + // build_store_values_extended(&ctx->lc, ctx->locals + idx + chan, count, 4, tmp_vec); + // } else { + // temp_ptr = ctx->locals[idx + chan + const_index * 4]; + + // LLVMBuildStore(ctx->lc.builder, value, temp_ptr); + // } + } + break; + + case nir_var_mem_global: { + int writemask = instr->const_index[0]; + LLVMValueRef address = get_src(ctx, instr->src[0]); + LLVMValueRef val = get_src(ctx, instr->src[1]); + + unsigned explicit_stride = glsl_get_explicit_stride(deref->type); + unsigned natural_stride = type_scalar_size_bytes(deref->type); + unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = get_elem_bits(&ctx->lc, LLVMTypeOf(val)) / 8; + bool split_stores = false; + + LLVMTypeRef ptr_type = + LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, ""); + + if (writemask == (1u << get_llvm_num_components(val)) - 1 && stride == natural_stride && + !split_stores) { + LLVMTypeRef ptr_type = + LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, ""); + + val = LLVMBuildBitCast(ctx->lc.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); + LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, val, address); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); + } else { + LLVMTypeRef val_type = LLVMTypeOf(val); + if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind) + val_type = LLVMGetElementType(val_type); + + LLVMTypeRef ptr_type = + LLVMPointerType(val_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, ""); + for (unsigned chan = 0; chan < 4; chan++) { + if (!(writemask & (1 << chan))) + continue; + + LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, chan * stride / natural_stride, 0); + + LLVMValueRef ptr = build_gep_ptr(&ctx->lc, address, offset); + LLVMValueRef src = llvm_extract_elem(&ctx->lc, val, chan); + src = LLVMBuildBitCast(ctx->lc.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); + LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, src, ptr); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); + } + } + break; + } + default: + abort(); + break; + } + + // if (ctx->ac.postponed_kill) + // ac_build_endif(&ctx->ac, 7002); +} + +static void visit_intrinsic(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr) +{ + LLVMValueRef result = NULL; + + switch (instr->intrinsic) { + case nir_intrinsic_ballot: + // result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0])); + // if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size) + // result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, ""); + break; + case nir_intrinsic_read_invocation: + // result = + // ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); + break; + case nir_intrinsic_read_first_invocation: + // result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL); + break; + case nir_intrinsic_load_subgroup_invocation: + // result = ac_get_thread_id(&ctx->ac); + break; + case nir_intrinsic_load_work_group_id: { + // LLVMValueRef values[3]; + + // for (int i = 0; i < 3; i++) { + // values[i] = ctx->args->workgroup_ids[i].used + // ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) + // : ctx->ac.i32_0; + // } + + // result = ac_build_gather_values(&ctx->ac, values, 3); + break; + } + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_first_vertex: + //result = ctx->abi->load_base_vertex(ctx->abi); + result = LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index); + break; + case nir_intrinsic_load_local_group_size: + // result = ctx->abi->load_local_group_size(ctx->abi); + break; + case nir_intrinsic_load_vertex_id: + result = LLVMBuildAdd(ctx->lc.builder, LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index), + LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index), ""); + break; + case nir_intrinsic_load_vertex_id_zero_base: { + // result = ctx->abi->vertex_id; + result = LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index); + break; + } + case nir_intrinsic_load_local_invocation_id: { + // result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids); + break; + } + case nir_intrinsic_load_base_instance: + // result = ac_get_arg(&ctx->ac, ctx->args->start_instance); + break; + case nir_intrinsic_load_draw_id: + // result = ac_get_arg(&ctx->ac, ctx->args->draw_id); + break; + case nir_intrinsic_load_view_index: + // result = ac_get_arg(&ctx->ac, ctx->args->view_index); + break; + case nir_intrinsic_load_invocation_id: + // if (ctx->stage == MESA_SHADER_TESS_CTRL) { + // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5); + // } else { + // if (ctx->ac.chip_class >= GFX10) { + // result = + // LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id), + // LLVMConstInt(ctx->ac.i32, 127, 0), ""); + // } else { + // result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id); + // } + // } + break; + case nir_intrinsic_load_primitive_id: + // if (ctx->stage == MESA_SHADER_GEOMETRY) { + // result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id); + // } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { + // result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id); + // } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + // result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id); + // } else + // fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); + // break; + // case nir_intrinsic_load_sample_id: + // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4); + break; + case nir_intrinsic_load_sample_pos: + // result = load_sample_pos(ctx); + break; + case nir_intrinsic_load_sample_mask_in: + // result = ctx->abi->load_sample_mask_in(ctx->abi); + break; + case nir_intrinsic_load_frag_coord: { + // LLVMValueRef values[4] = { + // ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]), + // ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]), + // ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))}; + // result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4)); + break; + } + case nir_intrinsic_load_layer_id: + // result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; + break; + case nir_intrinsic_load_front_face: + // result = ac_get_arg(&ctx->ac, ctx->args->front_face); + break; + case nir_intrinsic_load_helper_invocation: + // result = ac_build_load_helper_invocation(&ctx->ac); + break; + case nir_intrinsic_is_helper_invocation: + // result = ac_build_is_helper_invocation(&ctx->ac); + break; + case nir_intrinsic_load_color0: + // result = ctx->abi->color0; + break; + case nir_intrinsic_load_color1: + // result = ctx->abi->color1; + break; + case nir_intrinsic_load_user_data_amd: + // assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32); + // result = ctx->abi->user_data; + break; + case nir_intrinsic_load_instance_id: + // result = ctx->abi->instance_id; + break; + case nir_intrinsic_load_num_work_groups: + // result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups); + break; + case nir_intrinsic_load_local_invocation_index: + // result = visit_load_local_invocation_index(ctx); + break; + case nir_intrinsic_load_subgroup_id: + // result = visit_load_subgroup_id(ctx); + break; + case nir_intrinsic_load_num_subgroups: + // result = visit_load_num_subgroups(ctx); + break; + case nir_intrinsic_first_invocation: + // result = visit_first_invocation(ctx); + break; + case nir_intrinsic_load_push_constant: + // result = visit_load_push_constant(ctx, instr); + break; + case nir_intrinsic_vulkan_resource_index: { + // LLVMValueRef index = get_src(ctx, instr->src[0]); + // unsigned desc_set = nir_intrinsic_desc_set(instr); + // unsigned binding = nir_intrinsic_binding(instr); + + // result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding); + break; + } + case nir_intrinsic_vulkan_resource_reindex: + // result = visit_vulkan_resource_reindex(ctx, instr); + break; + case nir_intrinsic_store_ssbo: + // visit_store_ssbo(ctx, instr); + break; + case nir_intrinsic_load_ssbo: + // result = visit_load_buffer(ctx, instr); + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + // result = visit_atomic_ssbo(ctx, instr); + break; + case nir_intrinsic_load_ubo: + // result = visit_load_ubo_buffer(ctx, instr); + break; + case nir_intrinsic_get_buffer_size: + // result = visit_get_buffer_size(ctx, instr); + break; + case nir_intrinsic_load_deref: + result = visit_load_var(ctx, instr); + break; + case nir_intrinsic_store_deref: + visit_store_var(ctx, instr); + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_per_vertex_input: + // result = visit_load(ctx, instr, false); + break; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + // result = visit_load(ctx, instr, true); + break; + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + // visit_store_output(ctx, instr); + break; + case nir_intrinsic_load_shared: + // result = visit_load_shared(ctx, instr); + break; + case nir_intrinsic_store_shared: + // visit_store_shared(ctx, instr); + break; + case nir_intrinsic_bindless_image_samples: + case nir_intrinsic_image_deref_samples: + // result = visit_image_samples(ctx, instr); + break; + case nir_intrinsic_bindless_image_load: + // result = visit_image_load(ctx, instr, true); + break; + case nir_intrinsic_image_deref_load: + // result = visit_image_load(ctx, instr, false); + break; + case nir_intrinsic_bindless_image_store: + // visit_image_store(ctx, instr, true); + break; + case nir_intrinsic_image_deref_store: + // visit_image_store(ctx, instr, false); + break; + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_imin: + case nir_intrinsic_bindless_image_atomic_umin: + case nir_intrinsic_bindless_image_atomic_imax: + case nir_intrinsic_bindless_image_atomic_umax: + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_comp_swap: + case nir_intrinsic_bindless_image_atomic_inc_wrap: + case nir_intrinsic_bindless_image_atomic_dec_wrap: + // result = visit_image_atomic(ctx, instr, true); + break; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_inc_wrap: + case nir_intrinsic_image_deref_atomic_dec_wrap: + // result = visit_image_atomic(ctx, instr, false); + break; + case nir_intrinsic_bindless_image_size: + // result = visit_image_size(ctx, instr, true); + break; + case nir_intrinsic_image_deref_size: + // result = visit_image_size(ctx, instr, false); + break; + case nir_intrinsic_shader_clock: + // result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr)); + break; + case nir_intrinsic_discard: + case nir_intrinsic_discard_if: + // emit_discard(ctx, instr); + break; + case nir_intrinsic_demote: + case nir_intrinsic_demote_if: + // emit_demote(ctx, instr); + break; + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + // emit_membar(&ctx->ac, instr); + break; + case nir_intrinsic_scoped_barrier: { + // assert(!(nir_intrinsic_memory_semantics(instr) & + // (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); + + // nir_variable_mode modes = nir_intrinsic_memory_modes(instr); + + // unsigned wait_flags = 0; + // if (modes & (nir_var_mem_global | nir_var_mem_ssbo)) + // wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; + // if (modes & nir_var_mem_shared) + // wait_flags |= AC_WAIT_LGKM; + + // if (wait_flags) + // ac_build_waitcnt(&ctx->ac, wait_flags); + + // if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP) + // ac_emit_barrier(&ctx->ac, ctx->stage); + break; + } + case nir_intrinsic_memory_barrier_tcs_patch: + break; + case nir_intrinsic_control_barrier: + // ac_emit_barrier(&ctx->ac, ctx->stage); + break; + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_fadd: { + // LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size); + // result = visit_var_atomic(ctx, instr, ptr, 1); + break; + } + case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_imin: + case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_imax: + case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_and: + case nir_intrinsic_deref_atomic_or: + case nir_intrinsic_deref_atomic_xor: + case nir_intrinsic_deref_atomic_exchange: + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fadd: { + // LLVMValueRef ptr = get_src(ctx, instr->src[0]); + // result = visit_var_atomic(ctx, instr, ptr, 1); + break; + } + case nir_intrinsic_load_barycentric_pixel: + // result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr)); + break; + case nir_intrinsic_load_barycentric_centroid: + // result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr)); + break; + case nir_intrinsic_load_barycentric_sample: + // result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr)); + break; + case nir_intrinsic_load_barycentric_model: + // result = barycentric_model(ctx); + break; + case nir_intrinsic_load_barycentric_at_offset: { + // LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); + // result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset); + break; + } + case nir_intrinsic_load_barycentric_at_sample: { + // LLVMValueRef sample_id = get_src(ctx, instr->src[0]); + // result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id); + break; + } + case nir_intrinsic_load_interpolated_input: { + /* We assume any indirect loads have been lowered away */ + // ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]); + // assert(offset); + // assert(offset[0].i32 == 0); + + // LLVMValueRef interp_param = get_src(ctx, instr->src[0]); + // unsigned index = nir_intrinsic_base(instr); + // unsigned component = nir_intrinsic_component(instr); + // result = load_interpolated_input(ctx, interp_param, index, component, + // instr->dest.ssa.num_components, instr->dest.ssa.bit_size); + break; + } + case nir_intrinsic_emit_vertex: + // ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); + break; + case nir_intrinsic_emit_vertex_with_counter: { + // unsigned stream = nir_intrinsic_stream_id(instr); + // LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); + // ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs); + break; + } + case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + // ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); + break; + case nir_intrinsic_load_tess_coord: + // result = ctx->abi->load_tess_coord(ctx->abi); + break; + case nir_intrinsic_load_tess_level_outer: + // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false); + break; + case nir_intrinsic_load_tess_level_inner: + // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false); + break; + case nir_intrinsic_load_tess_level_outer_default: + // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true); + break; + case nir_intrinsic_load_tess_level_inner_default: + // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true); + break; + case nir_intrinsic_load_patch_vertices_in: + // result = ctx->abi->load_patch_vertices_in(ctx->abi); + break; + case nir_intrinsic_vote_all: { + // LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); + // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; + } + case nir_intrinsic_vote_any: { + // LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0])); + // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; + } + case nir_intrinsic_shuffle: + // if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 || + // (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) { + // result = + // ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); + // } else { + // LLVMValueRef src = get_src(ctx, instr->src[0]); + // LLVMValueRef index = get_src(ctx, instr->src[1]); + // LLVMTypeRef type = LLVMTypeOf(src); + // struct waterfall_context wctx; + // LLVMValueRef index_val; + + // index_val = enter_waterfall(ctx, &wctx, index, true); + + // src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, ""); + + // result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32, + // (LLVMValueRef[]){src, index_val}, 2, + // AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); + + // result = LLVMBuildTrunc(ctx->ac.builder, result, type, ""); + + // result = exit_waterfall(ctx, &wctx, result); + // } + break; + case nir_intrinsic_reduce: + // result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0], + // instr->const_index[1]); + break; + case nir_intrinsic_inclusive_scan: + // result = + // ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]); + break; + case nir_intrinsic_exclusive_scan: + // result = + // ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]); + break; + case nir_intrinsic_quad_broadcast: { + // unsigned lane = nir_src_as_uint(instr->src[1]); + // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane); + break; + } + case nir_intrinsic_quad_swap_horizontal: + // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2); + break; + case nir_intrinsic_quad_swap_vertical: + // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1); + break; + case nir_intrinsic_quad_swap_diagonal: + // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0); + break; + case nir_intrinsic_quad_swizzle_amd: { + // uint32_t mask = nir_intrinsic_swizzle_mask(instr); + // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3, + // (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3); + break; + } + case nir_intrinsic_masked_swizzle_amd: { + // uint32_t mask = nir_intrinsic_swizzle_mask(instr); + // result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask); + break; + } + case nir_intrinsic_write_invocation_amd: + // result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]), + // get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2])); + break; + case nir_intrinsic_mbcnt_amd: + // result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0])); + break; + case nir_intrinsic_load_scratch: { + // LLVMValueRef offset = get_src(ctx, instr->src[0]); + // LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset); + // LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + // LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1 + // ? comp_type + // : LLVMVectorType(comp_type, instr->dest.ssa.num_components); + // unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + // ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), ""); + // result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + break; + } + case nir_intrinsic_store_scratch: { + // LLVMValueRef offset = get_src(ctx, instr->src[1]); + // LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset); + // LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size); + // unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + // ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), ""); + // LLVMValueRef src = get_src(ctx, instr->src[0]); + // unsigned wrmask = nir_intrinsic_write_mask(instr); + // while (wrmask) { + // int start, count; + // u_bit_scan_consecutive_range(&wrmask, &start, &count); + + // LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false); + // LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, ""); + // LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count); + // offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr, + // LLVMPointerType(vec_type, addr_space), ""); + // LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count); + // LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr); + // } + break; + } + case nir_intrinsic_load_constant: { + // unsigned base = nir_intrinsic_base(instr); + // unsigned range = nir_intrinsic_range(instr); + + // LLVMValueRef offset = get_src(ctx, instr->src[0]); + // offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), ""); + + // /* Clamp the offset to avoid out-of-bound access because global + // * instructions can't handle them. + // */ + // LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false); + // LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, ""); + // offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, ""); + + // LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset); + // LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + // LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1 + // ? comp_type + // : LLVMVectorType(comp_type, instr->dest.ssa.num_components); + // unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + // ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), ""); + // result = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + break; + } + default: + fprintf(stderr, "Unknown intrinsic: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + break; + } + if (result) { + ctx->ssa_defs[instr->dest.ssa.index] = result; + } +} + +static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list); + +static void visit_block(struct libresoc_nir_tran_ctx *ctx, nir_block *block) +{ + nir_foreach_instr (instr, block) { + switch (instr->type) { + case nir_instr_type_alu: + visit_alu(ctx, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + visit_load_const(ctx, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_intrinsic: + visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + // visit_tex(ctx, nir_instr_as_tex(instr)); + break; + case nir_instr_type_phi: + visit_phi(ctx, nir_instr_as_phi(instr)); + break; + case nir_instr_type_ssa_undef: + visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); + break; + case nir_instr_type_jump: + visit_jump(&ctx->lc, nir_instr_as_jump(instr)); + break; + case nir_instr_type_deref: + visit_deref(ctx, nir_instr_as_deref(instr)); + break; + default: + fprintf(stderr, "Unknown NIR instr type: "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + abort(); + } + } +} + +static void visit_if(struct libresoc_nir_tran_ctx *ctx, nir_if *if_stmt) +{ + +} + +static void visit_loop(struct libresoc_nir_tran_ctx *ctx, nir_loop *loop) +{ + +} + +static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) + { + switch (node->type) { + case nir_cf_node_block: + visit_block(ctx, nir_cf_node_as_block(node)); + break; + + case nir_cf_node_if: + visit_if(ctx, nir_cf_node_as_if(node)); + break; + + case nir_cf_node_loop: + visit_loop(ctx, nir_cf_node_as_loop(node)); + break; + + default: + assert(0); + } + } +} + +LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir) +{ + struct libresoc_nir_tran_ctx ctx = {}; + struct nir_function *func; + char shader_name[60]; + sprintf(shader_name, "libresoc-shader-%s", gl_shader_stage_name(nir->info.stage)); + LLVMModuleRef mod = LLVMModuleCreateWithNameInContext(shader_name, llvm_ref->lc.context); + ctx.lc.module = &mod; + ctx.lc = llvm_ref->lc; + ctx.stage = nir->info.stage; + ctx.info = &nir->info; + + if (ctx.stage == MESA_SHADER_VERTEX) { + add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.base_vertex); + add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.start_instance); + add_arg(&ctx.args, ARG_VGPR, 1, ARG_INT, &ctx.args.vertex_id); + } + LLVMTypeRef arg_types[32]; + LLVMTypeRef ret_type = LLVMVoidTypeInContext(ctx.lc.context); + for (unsigned i = 0; i < ctx.args.arg_count; i++) { + arg_types[i] = arg_llvm_type(ctx.args.args[i].type, ctx.args.args[i].size, &ctx.lc); + } + + //TODO: this is zero argument function and returns void + LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, ctx.args.arg_count, 0); + + LLVMValueRef main_function = LLVMAddFunction(mod, "main_function", main_function_type); + LLVMBasicBlockRef main_function_body = + LLVMAppendBasicBlockInContext(ctx.lc.context, main_function, "main_body"); + LLVMPositionBuilderAtEnd(ctx.lc.builder, main_function_body); + ctx.main_function = main_function; + + ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + func = (struct nir_function *)exec_list_get_head(&nir->functions); + + nir_index_ssa_defs(func->impl); + ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef)); + setup_locals(&ctx, func); + setup_scratch(&ctx, nir); + setup_constant_data(&ctx, nir); + + // if (gl_shader_stage_is_compute(nir->info.stage)) + // setup_shared(&ctx, nir); + visit_cf_list(&ctx, &func->impl->body); char *error = NULL; - LLVMVerifyModule(mod, LLVMAbortProcessAction, &error); + LLVMVerifyModule(mod, LLVMPrintMessageAction, &error); LLVMDumpModule(mod); LLVMDisposeMessage(error); - LLVMOrcModuleHandle mod_handle; - LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref, - &mod_handle, - mod, - orc_sym_resolver, - (void *)(llvm_ref->orc_ref)); + return mod; + // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod"); + // LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() }; + // LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0); + // LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type); + // LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry"); + // LLVMBuilderRef builder = LLVMCreateBuilder(); + // LLVMPositionBuilderAtEnd(builder, entry); + // LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp"); + // LLVMBuildRet(builder, tmp); + // char *error = NULL; + // LLVMVerifyModule(mod, LLVMAbortProcessAction, &error); + // LLVMDumpModule(mod); + // LLVMDisposeMessage(error); + // LLVMOrcModuleHandle mod_handle; + // LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref, + // &mod_handle, + // mod, + // orc_sym_resolver, + // (void *)(llvm_ref->orc_ref)); } diff --git a/src/libre-soc/vulkan/libresoc_llvm.h b/src/libre-soc/vulkan/libresoc_llvm.h index ac577295911..8d3f671a029 100644 --- a/src/libre-soc/vulkan/libresoc_llvm.h +++ b/src/libre-soc/vulkan/libresoc_llvm.h @@ -3,17 +3,123 @@ #include +enum +{ + ADDR_SPACE_FLAT = 0, + ADDR_SPACE_GLOBAL = 1, + ADDR_SPACE_GDS = 2, + ADDR_SPACE_LDS = 3, + ADDR_SPACE_CONST = 4, + ADDR_SPACE_CONST_32BIT = 6, +}; + +enum func_attr +{ + FUNC_ATTR_ALWAYSINLINE = (1 << 0), + FUNC_ATTR_INREG = (1 << 2), + FUNC_ATTR_NOALIAS = (1 << 3), + FUNC_ATTR_NOUNWIND = (1 << 4), + FUNC_ATTR_READNONE = (1 << 5), + FUNC_ATTR_READONLY = (1 << 6), + FUNC_ATTR_WRITEONLY = (1 << 7), + FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8), + FUNC_ATTR_CONVERGENT = (1 << 9), + + /* Legacy intrinsic that needs attributes on function declarations + * and they must match the internal LLVM definition exactly, otherwise + * intrinsic selection fails. + */ + FUNC_ATTR_LEGACY = (1u << 31), +}; + +enum target_machine_options +{ + TM_SUPPORTS_SPILL = (1 << 0), + TM_FORCE_ENABLE_XNACK = (1 << 1), + TM_FORCE_DISABLE_XNACK = (1 << 2), + TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3), + TM_CHECK_IR = (1 << 4), + TM_ENABLE_GLOBAL_ISEL = (1 << 5), + TM_CREATE_LOW_OPT = (1 << 6), + TM_WAVE32 = (1 << 7), +}; + +enum float_mode +{ + FLOAT_MODE_DEFAULT, + FLOAT_MODE_DEFAULT_OPENGL, + FLOAT_MODE_DENORM_FLUSH_TO_ZERO, +}; + struct nir_shader; +struct llvm_flow; -struct libresoc_llvm { - LLVMOrcJITStackRef orc_ref; +struct llvm_flow_state { + struct llvm_flow *stack; + unsigned depth_max; + unsigned depth; +}; + +typedef struct libresoc_llvm_context { LLVMContextRef context; LLVMBuilderRef builder; + LLVMModuleRef *module; + LLVMTypeRef voidt; + LLVMTypeRef i1; + LLVMTypeRef i8; + LLVMTypeRef i16; + LLVMTypeRef i32; + LLVMTypeRef i64; + LLVMTypeRef i128; + LLVMTypeRef intptr; + LLVMTypeRef f16; + LLVMTypeRef f32; + LLVMTypeRef f64; + LLVMTypeRef v2i16; + LLVMTypeRef v4i16; + LLVMTypeRef v2f16; + LLVMTypeRef v4f16; + LLVMTypeRef v2i32; + LLVMTypeRef v3i32; + LLVMTypeRef v4i32; + LLVMTypeRef v2f32; + LLVMTypeRef v3f32; + LLVMTypeRef v4f32; + LLVMTypeRef v8i32; + //LLVMTypeRef iN_wavemask; + //LLVMTypeRef iN_ballotmask; + + LLVMValueRef i8_0; + LLVMValueRef i8_1; + LLVMValueRef i16_0; + LLVMValueRef i16_1; + LLVMValueRef i32_0; + LLVMValueRef i32_1; + LLVMValueRef i64_0; + LLVMValueRef i64_1; + LLVMValueRef i128_0; + LLVMValueRef i128_1; + LLVMValueRef f16_0; + LLVMValueRef f16_1; + LLVMValueRef f32_0; + LLVMValueRef f32_1; + LLVMValueRef f64_0; + LLVMValueRef f64_1; + LLVMValueRef i1true; + LLVMValueRef i1false; + + struct llvm_flow_state *flow; + unsigned float_mode; +} libresoc_llvm_context; + +struct libresoc_llvm { + LLVMOrcJITStackRef orc_ref; + libresoc_llvm_context lc; }; void InitLLVM(struct libresoc_llvm *llvm_ref); void DestroyLLVM(struct libresoc_llvm *llvm_ref); -void libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir); +LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir); #endif diff --git a/src/libre-soc/vulkan/libresoc_llvm_build.c b/src/libre-soc/vulkan/libresoc_llvm_build.c new file mode 100644 index 00000000000..a7896fd8bf1 --- /dev/null +++ b/src/libre-soc/vulkan/libresoc_llvm_build.c @@ -0,0 +1,927 @@ +#include "libresoc_llvm_build.h" +#include "util/macros.h" +#include "util/bitscan.h" +#include +#include +#include +#include + + +/* Data for if/else/endif and bgnloop/endloop control flow structures. + */ +struct llvm_flow { + /* Loop exit or next part of if/else/endif. */ + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef loop_entry_block; +}; + +void enable_signed_zeros(struct libresoc_llvm_context *ctx) +{ + //TODO: this is in C++, need to convert this into C + // if (ctx->float_mode == FLOAT_MODE_DEFAULT_OPENGL) { + // llvm::FastMathFlags flags = ctx->b->getFastMathFlags(); + + // /* This disables the optimization of (x + 0), which is used + // * to convert negative zero to positive zero. + // */ + // flags.setNoSignedZeros(false); + // ctx->b->setFastMathFlags(flags); + // } +} + +void disable_signed_zeros(struct libresoc_llvm_context *ctx) +{ + //TODO: this is in C++, need to convert this into C + // if (ctx->float_mode == FLOAT_MODE_DEFAULT_OPENGL) + // llvm::FastMathFlags flags = ctx->b->getFastMathFlags(); + + // flags.setNoSignedZeros(); + // ctx->b->setFastMathFlags(flags); + // } +} + +static const char *attr_to_str(enum func_attr attr) +{ + switch (attr) { + case FUNC_ATTR_ALWAYSINLINE: + return "alwaysinline"; + case FUNC_ATTR_INREG: + return "inreg"; + case FUNC_ATTR_NOALIAS: + return "noalias"; + case FUNC_ATTR_NOUNWIND: + return "nounwind"; + case FUNC_ATTR_READNONE: + return "readnone"; + case FUNC_ATTR_READONLY: + return "readonly"; + case FUNC_ATTR_WRITEONLY: + return "writeonly"; + case FUNC_ATTR_INACCESSIBLE_MEM_ONLY: + return "inaccessiblememonly"; + case FUNC_ATTR_CONVERGENT: + return "convergent"; + default: + fprintf(stderr, "Unhandled function attribute: %x\n", attr); + return 0; + } +} + +void add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, + enum func_attr attr) +{ + const char *attr_name = attr_to_str(attr); + unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); + + if (LLVMIsAFunction(function)) + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + else + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); +} + +void add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask) +{ + attrib_mask |= FUNC_ATTR_NOUNWIND; + attrib_mask &= ~FUNC_ATTR_LEGACY; + + while (attrib_mask) { + enum func_attr attr = 1u << u_bit_scan(&attrib_mask); + add_function_attr(ctx, function, -1, attr); + } +} + +static struct llvm_flow *get_innermost_loop(struct libresoc_llvm_context *lc) +{ + for (unsigned i = lc->flow->depth; i > 0; --i) { + if (lc->flow->stack[i - 1].loop_entry_block) + return &lc->flow->stack[i - 1]; + } + return NULL; +} + +static LLVMValueRef eliminate_negative_zero(struct libresoc_llvm_context *ctx, LLVMValueRef val) +{ + enable_signed_zeros(ctx); + /* (val + 0) converts negative zero to positive zero. */ + val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), ""); + disable_signed_zeros(ctx); + return val; +} + +void build_break(struct libresoc_llvm_context *lc) +{ + struct llvm_flow *flow = get_innermost_loop(lc); + LLVMBuildBr(lc->builder, flow->next_block); +} + +void build_continue(struct libresoc_llvm_context *lc) +{ + struct llvm_flow *flow = get_innermost_loop(lc); + LLVMBuildBr(lc->builder, flow->loop_entry_block); +} + +int get_llvm_num_components(LLVMValueRef value) +{ + LLVMTypeRef type = LLVMTypeOf(value); + unsigned num_components = + LLVMGetTypeKind(type) == LLVMVectorTypeKind ? LLVMGetVectorSize(type) : 1; + return num_components; +} + +LLVMValueRef llvm_extract_elem(struct libresoc_llvm_context *lc, LLVMValueRef value, int index) +{ + if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) { + assert(index == 0); + return value; + } + + return LLVMBuildExtractElement(lc->builder, value, LLVMConstInt(lc->i32, index, false), ""); +} + +int get_elem_bits(struct libresoc_llvm_context *lc, LLVMTypeRef type) +{ + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) + type = LLVMGetElementType(type); + + if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) + return LLVMGetIntTypeWidth(type); + + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { + if (LLVMGetPointerAddressSpace(type) == ADDR_SPACE_LDS) + return 32; + } + + if (type == lc->f16) + return 16; + if (type == lc->f32) + return 32; + if (type == lc->f64) + return 64; + + unreachable("Unhandled type kind in get_elem_bits"); +} + +/** + * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with + * intrinsic names). + */ +void build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) +{ + LLVMTypeRef elem_type = type; + + assert(bufsize >= 8); + + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { + int ret = snprintf(buf, bufsize, "v%u", LLVMGetVectorSize(type)); + if (ret < 0) { + char *type_name = LLVMPrintTypeToString(type); + fprintf(stderr, "Error building type name for: %s\n", type_name); + LLVMDisposeMessage(type_name); + return; + } + elem_type = LLVMGetElementType(type); + buf += ret; + bufsize -= ret; + } + switch (LLVMGetTypeKind(elem_type)) { + default: + break; + case LLVMIntegerTypeKind: + snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); + break; + case LLVMHalfTypeKind: + snprintf(buf, bufsize, "f16"); + break; + case LLVMFloatTypeKind: + snprintf(buf, bufsize, "f32"); + break; + case LLVMDoubleTypeKind: + snprintf(buf, bufsize, "f64"); + break; + } +} + +static LLVMTypeRef to_integer_type_scalar(struct libresoc_llvm_context *lc, LLVMTypeRef t) +{ + if (t == lc->i8) + return lc->i8; + else if (t == lc->f16 || t == lc->i16) + return lc->i16; + else if (t == lc->f32 || t == lc->i32) + return lc->i32; + else if (t == lc->f64 || t == lc->i64) + return lc->i64; + else + unreachable("Unhandled integer size"); +} + +LLVMTypeRef to_integer_type(struct libresoc_llvm_context *lc, LLVMTypeRef t) +{ + if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { + LLVMTypeRef elem_type = LLVMGetElementType(t); + return LLVMVectorType(to_integer_type_scalar(lc, elem_type), LLVMGetVectorSize(t)); + } + if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) { + switch (LLVMGetPointerAddressSpace(t)) { + case ADDR_SPACE_GLOBAL: + return lc->i64; + case ADDR_SPACE_CONST_32BIT: + case ADDR_SPACE_LDS: + return lc->i32; + default: + unreachable("unhandled address space"); + } + } + return to_integer_type_scalar(lc, t); +} + +LLVMValueRef to_integer(struct libresoc_llvm_context *lc, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { + return LLVMBuildPtrToInt(lc->builder, v, to_integer_type(lc, type), ""); + } + return LLVMBuildBitCast(lc->builder, v, to_integer_type(lc, type), ""); +} + +LLVMValueRef to_integer_or_pointer(struct libresoc_llvm_context *lc, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) + return v; + return to_integer(lc, v); +} + +static LLVMTypeRef to_float_type_scalar(struct libresoc_llvm_context *lc, LLVMTypeRef t) +{ + if (t == lc->i8) + return lc->i8; + else if (t == lc->i16 || t == lc->f16) + return lc->f16; + else if (t == lc->i32 || t == lc->f32) + return lc->f32; + else if (t == lc->i64 || t == lc->f64) + return lc->f64; + else + unreachable("Unhandled float size"); +} + +LLVMTypeRef to_float_type(struct libresoc_llvm_context *lc, LLVMTypeRef t) +{ + if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { + LLVMTypeRef elem_type = LLVMGetElementType(t); + return LLVMVectorType(to_float_type_scalar(lc, elem_type), LLVMGetVectorSize(t)); + } + return to_float_type_scalar(lc, t); +} + +LLVMValueRef to_float(struct libresoc_llvm_context *lc, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + return LLVMBuildBitCast(lc->builder, v, to_float_type(lc, type), ""); +} + +unsigned get_type_size(LLVMTypeRef type) +{ + LLVMTypeKind kind = LLVMGetTypeKind(type); + + switch (kind) { + case LLVMIntegerTypeKind: + return LLVMGetIntTypeWidth(type) / 8; + case LLVMHalfTypeKind: + return 2; + case LLVMFloatTypeKind: + return 4; + case LLVMDoubleTypeKind: + return 8; + case LLVMPointerTypeKind: + if (LLVMGetPointerAddressSpace(type) == ADDR_SPACE_CONST_32BIT) + return 4; + return 8; + case LLVMVectorTypeKind: + return LLVMGetVectorSize(type) * get_type_size(LLVMGetElementType(type)); + case LLVMArrayTypeKind: + return LLVMGetArrayLength(type) * get_type_size(LLVMGetElementType(type)); + default: + assert(0); + return 0; + } +} + +LLVMValueRef build_intrinsic(struct libresoc_llvm_context *lc, const char *name, + LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, + unsigned attrib_mask) +{ + LLVMValueRef function, call; + bool set_callsite_attrs = !(attrib_mask & FUNC_ATTR_LEGACY); + + function = LLVMGetNamedFunction(*(lc->module), name); + if (!function) { + LLVMTypeRef param_types[32], function_type; + unsigned i; + + assert(param_count <= 32); + + for (i = 0; i < param_count; ++i) { + assert(params[i]); + param_types[i] = LLVMTypeOf(params[i]); + } + function_type = LLVMFunctionType(return_type, param_types, param_count, 0); + function = LLVMAddFunction(*(lc->module), name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + if (!set_callsite_attrs) + add_func_attributes(lc->context, function, attrib_mask); + } + + call = LLVMBuildCall(lc->builder, function, params, param_count, ""); + if (set_callsite_attrs) + add_func_attributes(lc->context, call, attrib_mask); + return call; +} + +LLVMValueRef build_canonicalize(struct libresoc_llvm_context *lc, LLVMValueRef src0, unsigned bitsize) +{ + LLVMTypeRef type; + char *intr; + + if (bitsize == 16) { + intr = "llvm.canonicalize.f16"; + type = lc->f16; + } else if (bitsize == 32) { + intr = "llvm.canonicalize.f32"; + type = lc->f32; + } else { + intr = "llvm.canonicalize.f64"; + type = lc->f64; + } + + LLVMValueRef params[] = { + src0, + }; + return build_intrinsic(lc, intr, type, params, 1, FUNC_ATTR_READNONE); +} + +LLVMValueRef build_alloca_undef(struct libresoc_llvm_context *lc, LLVMTypeRef type, const char *name) +{ + LLVMBuilderRef builder = lc->builder; + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(lc->context); + LLVMValueRef res; + + if (first_instr) { + LLVMPositionBuilderBefore(first_builder, first_instr); + } else { + LLVMPositionBuilderAtEnd(first_builder, first_block); + } + + res = LLVMBuildAlloca(first_builder, type, name); + LLVMDisposeBuilder(first_builder); + return res; +} + +LLVMValueRef build_gep_ptr(struct libresoc_llvm_context *lc, LLVMValueRef base_ptr, + LLVMValueRef index) +{ + return LLVMBuildGEP(lc->builder, base_ptr, &index, 1, ""); +} + +LLVMValueRef build_gep0(struct libresoc_llvm_context *lc, LLVMValueRef base_ptr, LLVMValueRef index) +{ + LLVMValueRef indices[2] = { + lc->i32_0, + index, + }; + return LLVMBuildGEP(lc->builder, base_ptr, indices, 2, ""); +} + +void build_sendmsg(struct libresoc_llvm_context *lc, uint32_t msg, LLVMValueRef wave_id) +{ + LLVMValueRef args[2]; + args[0] = LLVMConstInt(lc->i32, msg, false); + args[1] = wave_id; + build_intrinsic(lc, "llvm.amdgcn.s.sendmsg", lc->voidt, args, 2, 0); +} + +LLVMValueRef build_imsb(struct libresoc_llvm_context *lc, LLVMValueRef arg, LLVMTypeRef dst_type) +{ + LLVMValueRef msb = + build_intrinsic(lc, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, FUNC_ATTR_READNONE); + + /* The HW returns the last bit index from MSB, but NIR/TGSI wants + * the index from LSB. Invert it by doing "31 - msb". */ + msb = LLVMBuildSub(lc->builder, LLVMConstInt(lc->i32, 31, false), msb, ""); + + LLVMValueRef all_ones = LLVMConstInt(lc->i32, -1, true); + LLVMValueRef cond = + LLVMBuildOr(lc->builder, LLVMBuildICmp(lc->builder, LLVMIntEQ, arg, lc->i32_0, ""), + LLVMBuildICmp(lc->builder, LLVMIntEQ, arg, all_ones, ""), ""); + + return LLVMBuildSelect(lc->builder, cond, all_ones, msb, ""); +} + +LLVMValueRef build_umsb(struct libresoc_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type) +{ + const char *intrin_name; + LLVMTypeRef type; + LLVMValueRef highest_bit; + LLVMValueRef zero; + unsigned bitsize; + + bitsize = get_elem_bits(ctx, LLVMTypeOf(arg)); + switch (bitsize) { + case 64: + intrin_name = "llvm.ctlz.i64"; + type = ctx->i64; + highest_bit = LLVMConstInt(ctx->i64, 63, false); + zero = ctx->i64_0; + break; + case 32: + intrin_name = "llvm.ctlz.i32"; + type = ctx->i32; + highest_bit = LLVMConstInt(ctx->i32, 31, false); + zero = ctx->i32_0; + break; + case 16: + intrin_name = "llvm.ctlz.i16"; + type = ctx->i16; + highest_bit = LLVMConstInt(ctx->i16, 15, false); + zero = ctx->i16_0; + break; + case 8: + intrin_name = "llvm.ctlz.i8"; + type = ctx->i8; + highest_bit = LLVMConstInt(ctx->i8, 7, false); + zero = ctx->i8_0; + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + LLVMValueRef params[2] = { + arg, + ctx->i1true, + }; + + LLVMValueRef msb = build_intrinsic(ctx, intrin_name, type, params, 2, FUNC_ATTR_READNONE); + + /* The HW returns the last bit index from MSB, but TGSI/NIR wants + * the index from LSB. Invert it by doing "31 - msb". */ + msb = LLVMBuildSub(ctx->builder, highest_bit, msb, ""); + + if (bitsize == 64) { + msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, ""); + } else if (bitsize < 32) { + msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, ""); + } + + /* check for zero */ + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""), + LLVMConstInt(ctx->i32, -1, true), msb, ""); +} + +LLVMValueRef build_fmin(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + char name[64], type[64]; + + build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.minnum.%s", type); + LLVMValueRef args[2] = {a, b}; + return build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, FUNC_ATTR_READNONE); +} + +LLVMValueRef build_fmax(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + char name[64], type[64]; + + build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.maxnum.%s", type); + LLVMValueRef args[2] = {a, b}; + return build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, FUNC_ATTR_READNONE); +} + +LLVMValueRef build_imin(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef build_imax(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef build_umin(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef build_umax(struct libresoc_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef build_clamp(struct libresoc_llvm_context *ctx, LLVMValueRef value) +{ + LLVMTypeRef t = LLVMTypeOf(value); + return build_fmin(ctx, build_fmax(ctx, value, LLVMConstReal(t, 0.0)), + LLVMConstReal(t, 1.0)); +} + +LLVMValueRef build_gather_values_extended(struct libresoc_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned value_stride, bool load, + bool always_vector) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef vec = NULL; + unsigned i; + + if (value_count == 1 && !always_vector) { + if (load) + return LLVMBuildLoad(builder, values[0], ""); + return values[0]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (i = 0; i < value_count; i++) { + LLVMValueRef value = values[i * value_stride]; + if (load) + value = LLVMBuildLoad(builder, value, ""); + + if (!i) + vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); + vec = LLVMBuildInsertElement(builder, vec, value, index, ""); + } + return vec; +} + +LLVMValueRef build_gather_values(struct libresoc_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count) +{ + return build_gather_values_extended(ctx, values, value_count, 1, false, false); +} + +LLVMValueRef build_varying_gather_values(struct libresoc_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned component) +{ + LLVMValueRef vec = NULL; + + if (value_count == 1) { + return values[component]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (unsigned i = component; i < value_count + component; i++) { + LLVMValueRef value = values[i]; + + if (i == component) + vec = LLVMGetUndef(LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); + vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); + } + return vec; +} + +LLVMValueRef build_fdiv(struct libresoc_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) +{ + unsigned type_size = get_type_size(LLVMTypeOf(den)); + const char *name; + + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->float_mode == FLOAT_MODE_DEFAULT_OPENGL && type_size == 8) + return LLVMBuildFDiv(ctx->builder, num, den, ""); + + if (type_size == 2) + name = "llvm.amdgcn.rcp.f16"; + else if (type_size == 4) + name = "llvm.amdgcn.rcp.f32"; + else + name = "llvm.amdgcn.rcp.f64"; + + LLVMValueRef rcp = + build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, FUNC_ATTR_READNONE); + + return LLVMBuildFMul(ctx->builder, num, rcp, ""); +} + +LLVMValueRef const_uint_vec(struct libresoc_llvm_context *ctx, LLVMTypeRef type, uint64_t value) +{ + + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { + LLVMValueRef scalar = LLVMConstInt(LLVMGetElementType(type), value, 0); + unsigned vec_size = LLVMGetVectorSize(type); + LLVMValueRef *scalars = alloca(vec_size * sizeof(LLVMValueRef *)); + + for (unsigned i = 0; i < vec_size; i++) + scalars[i] = scalar; + return LLVMConstVector(scalars, vec_size); + } + return LLVMConstInt(type, value, 0); +} + +LLVMValueRef build_isign(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMTypeRef type = LLVMTypeOf(src0); + LLVMValueRef val; + + /* v_med3 is selected only when max is first. (LLVM bug?) */ + val = build_imax(ctx, src0, const_uint_vec(ctx, type, -1)); + return build_imin(ctx, val, const_uint_vec(ctx, type, 1)); +} + +LLVMValueRef build_fsign(struct libresoc_llvm_context *ctx, LLVMValueRef src) +{ + LLVMTypeRef type = LLVMTypeOf(src); + LLVMValueRef pos, neg, dw[2], val; + unsigned bitsize = get_elem_bits(ctx, type); + + /* The standard version leads to this: + * v_cmp_ngt_f32_e64 s[0:1], s4, 0 ; D40B0000 00010004 + * v_cndmask_b32_e64 v4, 1.0, s4, s[0:1] ; D5010004 000008F2 + * v_cmp_le_f32_e32 vcc, 0, v4 ; 7C060880 + * v_cndmask_b32_e32 v4, -1.0, v4, vcc ; 020808F3 + * + * The isign version: + * v_add_f32_e64 v4, s4, 0 ; D5030004 00010004 + * v_med3_i32 v4, v4, -1, 1 ; D5580004 02058304 + * v_cvt_f32_i32_e32 v4, v4 ; 7E080B04 + * + * (src0 + 0) converts negative zero to positive zero. + * After that, int(fsign(x)) == isign(floatBitsToInt(x)). + * + * For FP64, use the standard version, which doesn't suffer from the huge DP rate + * reduction. (FP64 comparisons are as fast as int64 comparisons) + */ + if (bitsize == 16 || bitsize == 32) { + val = to_integer(ctx, eliminate_negative_zero(ctx, src)); + val = build_isign(ctx, val); + return LLVMBuildSIToFP(ctx->builder, val, type, ""); + } + + assert(bitsize == 64); + pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, ""); + neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, ""); + dw[0] = ctx->i32_0; + dw[1] = LLVMBuildSelect( + ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0), + LLVMBuildSelect(ctx->builder, neg, LLVMConstInt(ctx->i32, 0xBFF00000, 0), ctx->i32_0, ""), + ""); + return LLVMBuildBitCast(ctx->builder, build_gather_values(ctx, dw, 2), ctx->f64, ""); +} + +LLVMValueRef build_bitfield_reverse(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef result; + unsigned bitsize; + + bitsize = get_elem_bits(ctx, LLVMTypeOf(src0)); + + switch (bitsize) { + case 64: + result = build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 32: + result = build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + break; + case 16: + result = build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + case 8: + result = build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + return result; +} + +LLVMValueRef build_bit_count(struct libresoc_llvm_context *ctx, LLVMValueRef src0) +{ + LLVMValueRef result; + unsigned bitsize; + + bitsize = get_elem_bits(ctx, LLVMTypeOf(src0)); + + switch (bitsize) { + case 128: + result = build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 64: + result = build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + + result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, ""); + break; + case 32: + result = build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + break; + case 16: + result = build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + case 8: + result = build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1, + FUNC_ATTR_READNONE); + + result = LLVMBuildZExt(ctx->builder, result, ctx->i32, ""); + break; + default: + unreachable(!"invalid bitsize"); + break; + } + + return result; +} + +LLVMValueRef build_bfe(struct libresoc_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, + LLVMValueRef width, bool is_signed) +{ + LLVMValueRef args[] = { + input, + offset, + width, + }; + + return build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32", + ctx->i32, args, 3, FUNC_ATTR_READNONE); +} + +LLVMValueRef find_lsb(struct libresoc_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0) +{ + unsigned src0_bitsize = get_elem_bits(ctx, LLVMTypeOf(src0)); + const char *intrin_name; + LLVMTypeRef type; + LLVMValueRef zero; + + switch (src0_bitsize) { + case 64: + intrin_name = "llvm.cttz.i64"; + type = ctx->i64; + zero = ctx->i64_0; + break; + case 32: + intrin_name = "llvm.cttz.i32"; + type = ctx->i32; + zero = ctx->i32_0; + break; + case 16: + intrin_name = "llvm.cttz.i16"; + type = ctx->i16; + zero = ctx->i16_0; + break; + case 8: + intrin_name = "llvm.cttz.i8"; + type = ctx->i8; + zero = ctx->i8_0; + break; + default: + unreachable(!"invalid bitsize"); + } + + LLVMValueRef params[2] = { + src0, + + /* The value of 1 means that ffs(x=0) = undef, so LLVM won't + * add special code to check for x=0. The reason is that + * the LLVM behavior for x=0 is different from what we + * need here. However, LLVM also assumes that ffs(x) is + * in [0, 31], but GLSL expects that ffs(0) = -1, so + * a conditional assignment to handle 0 is still required. + * + * The hardware already implements the correct behavior. + */ + ctx->i1true, + }; + + LLVMValueRef lsb = build_intrinsic(ctx, intrin_name, type, params, 2, FUNC_ATTR_READNONE); + + if (src0_bitsize == 64) { + lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, ""); + } else if (src0_bitsize < 32) { + lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, ""); + } + + /* TODO: We need an intrinsic to skip this conditional. */ + /* Check for zero: */ + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""), + LLVMConstInt(ctx->i32, -1, 0), lsb, ""); +} + +LLVMValueRef build_image_get_sample_count(struct libresoc_llvm_context *ctx, LLVMValueRef rsrc) +{ + LLVMValueRef samples; + + /* Read the samples from the descriptor directly. + * Hardware doesn't have any instruction for this. + */ + samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), ""); + samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), ""); + samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), ""); + samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, ""); + return samples; +} + +LLVMValueRef build_cvt_pkrtz_f16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2]) +{ + return build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2, + FUNC_ATTR_READNONE); +} + +LLVMValueRef build_cvt_pknorm_i16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2]) +{ + LLVMValueRef res = build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2, + FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); +} + +LLVMValueRef build_cvt_pknorm_u16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2]) +{ + LLVMValueRef res = build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2, + FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); +} + +/* The 8-bit and 10-bit clamping is for HW workarounds. */ +LLVMValueRef build_cvt_pk_i16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi) +{ + assert(bits == 8 || bits == 10 || bits == 16); + + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); + LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1; + LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); + + /* Clamp. */ + if (bits != 16) { + for (int i = 0; i < 2; i++) { + bool alpha = hi && i == 1; + args[i] = build_imin(ctx, args[i], alpha ? max_alpha : max_rgb); + args[i] = build_imax(ctx, args[i], alpha ? min_alpha : min_rgb); + } + } + + LLVMValueRef res = + build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); +} + +/* The 8-bit and 10-bit clamping is for HW workarounds. */ +LLVMValueRef build_cvt_pk_u16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi) +{ + assert(bits == 8 || bits == 10 || bits == 16); + + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); + LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); + + /* Clamp. */ + if (bits != 16) { + for (int i = 0; i < 2; i++) { + bool alpha = hi && i == 1; + args[i] = build_umin(ctx, args[i], alpha ? max_alpha : max_rgb); + } + } + + LLVMValueRef res = + build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); +} + +LLVMValueRef build_wqm_vote(struct libresoc_llvm_context *ctx, LLVMValueRef i1) +{ + return build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, FUNC_ATTR_READNONE); +} + +void build_kill_if_false(struct libresoc_llvm_context *ctx, LLVMValueRef i1) +{ + build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0); +} diff --git a/src/libre-soc/vulkan/libresoc_llvm_build.h b/src/libre-soc/vulkan/libresoc_llvm_build.h new file mode 100644 index 00000000000..32f27cbaff2 --- /dev/null +++ b/src/libre-soc/vulkan/libresoc_llvm_build.h @@ -0,0 +1,93 @@ +#ifndef LIBRESOC_LLVM_BUILD_H +#define LIBRESOC_LLVM_BUILD_H + +#include "libresoc_llvm.h" +#include +#include + +void enable_signed_zeros(struct libresoc_llvm_context *ctx); +void disable_signed_zeros(struct libresoc_llvm_context *ctx); +void add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, + enum func_attr attr); +void add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask); +void build_break(struct libresoc_llvm_context *lc); +void build_continue(struct libresoc_llvm_context *lc); +LLVMValueRef build_alloca_undef(struct libresoc_llvm_context *lc, LLVMTypeRef type, const char *name); + +LLVMValueRef build_gep_ptr(struct libresoc_llvm_context *lc, LLVMValueRef base_ptr, + LLVMValueRef index); +LLVMValueRef build_gep0(struct libresoc_llvm_context *lc, LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef build_gather_values_extended(struct libresoc_llvm_context *lc, LLVMValueRef *values, unsigned value_count, unsigned value_stride, bool load, bool always_vector); + +LLVMValueRef build_varying_gather_values(struct libresoc_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned component); +LLVMTypeRef to_integer_type(struct libresoc_llvm_context *lc, LLVMTypeRef t); +LLVMValueRef to_integer(struct libresoc_llvm_context *lc, LLVMValueRef v); +LLVMValueRef to_integer_or_pointer(struct libresoc_llvm_context *lc, LLVMValueRef v); +LLVMTypeRef to_float_type(struct libresoc_llvm_context *lc, LLVMTypeRef t); +LLVMValueRef to_float(struct libresoc_llvm_context *lc, LLVMValueRef v); +LLVMValueRef build_intrinsic(struct libresoc_llvm_context *lc, const char *name, + LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, + unsigned attrib_mask); +LLVMValueRef build_canonicalize(struct libresoc_llvm_context *lc, LLVMValueRef src0, unsigned bitsize); +unsigned get_type_size(LLVMTypeRef type); +int get_llvm_num_components(LLVMValueRef value); + +void build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize); +int get_elem_bits(struct libresoc_llvm_context *lc, LLVMTypeRef type); + +LLVMValueRef llvm_extract_elem(struct libresoc_llvm_context *lc, LLVMValueRef value, int index); + +LLVMValueRef build_gather_values_extended(struct libresoc_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned value_stride, bool load, + bool always_vector); +LLVMValueRef build_gather_values(struct libresoc_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count); +LLVMValueRef build_fdiv(struct libresoc_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den); + +LLVMValueRef build_fsign(struct libresoc_llvm_context *ctx, LLVMValueRef src); +LLVMValueRef build_isign(struct libresoc_llvm_context *ctx, LLVMValueRef src0); +LLVMValueRef const_uint_vec(struct libresoc_llvm_context *ctx, LLVMTypeRef type, uint64_t value); + +LLVMValueRef build_bitfield_reverse(struct libresoc_llvm_context *ctx, LLVMValueRef src0); +LLVMValueRef build_bit_count(struct libresoc_llvm_context *ctx, LLVMValueRef src0); +LLVMValueRef build_bfe(struct libresoc_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, + LLVMValueRef width, bool is_signed); +LLVMValueRef find_lsb(struct libresoc_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); +#define SENDMSG_GS 2 +#define SENDMSG_GS_DONE 3 +#define SENDMSG_GS_ALLOC_REQ 9 + +#define SENDMSG_GS_OP_NOP (0 << 4) +#define SENDMSG_GS_OP_CUT (1 << 4) +#define SENDMSG_GS_OP_EMIT (2 << 4) +#define SENDMSG_GS_OP_EMIT_CUT (3 << 4) + +void build_sendmsg(struct libresoc_llvm_context *lc, uint32_t msg, LLVMValueRef wave_id); + +LLVMValueRef build_imsb(struct libresoc_llvm_context *lc, LLVMValueRef arg, LLVMTypeRef dst_type); + +LLVMValueRef build_umsb(struct libresoc_llvm_context *lc, LLVMValueRef arg, LLVMTypeRef dst_type); +LLVMValueRef build_fmin(struct libresoc_llvm_context *lc, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef build_fmax(struct libresoc_llvm_context *lc, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef build_imin(struct libresoc_llvm_context *lc, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef build_imax(struct libresoc_llvm_context *lc, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef build_umin(struct libresoc_llvm_context *lc, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef build_umax(struct libresoc_llvm_context *lc, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef build_clamp(struct libresoc_llvm_context *lc, LLVMValueRef value); + +#define AC_TID_MASK_TOP_LEFT 0xfffffffc +#define AC_TID_MASK_TOP 0xfffffffd +#define AC_TID_MASK_LEFT 0xfffffffe + +LLVMValueRef build_image_get_sample_count(struct libresoc_llvm_context *ctx, LLVMValueRef rsrc); +LLVMValueRef build_cvt_pkrtz_f16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef build_cvt_pknorm_i16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef build_cvt_pknorm_u16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef build_cvt_pk_i16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi); +LLVMValueRef build_cvt_pk_u16(struct libresoc_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, + bool hi); +LLVMValueRef build_wqm_vote(struct libresoc_llvm_context *ctx, LLVMValueRef i1); +void build_kill_if_false(struct libresoc_llvm_context *ctx, LLVMValueRef i1); +#endif diff --git a/src/libre-soc/vulkan/libresoc_pipeline.c b/src/libre-soc/vulkan/libresoc_pipeline.c index 9fde0ae31a5..097f37b6721 100644 --- a/src/libre-soc/vulkan/libresoc_pipeline.c +++ b/src/libre-soc/vulkan/libresoc_pipeline.c @@ -61,6 +61,7 @@ VkResult libresoc_create_shaders(struct libresoc_pipeline *pipeline, flags, subgroup_size, ballot_bit_size); + modules[i]->llvm_module = libresoc_nir_translate(&device->instance->llvm_ref, nir[i]); /* We don't want to alter meta shaders IR directly so clone it * first. */ diff --git a/src/libre-soc/vulkan/libresoc_shader.c b/src/libre-soc/vulkan/libresoc_shader.c index d74b6b13d07..533daa893f6 100644 --- a/src/libre-soc/vulkan/libresoc_shader.c +++ b/src/libre-soc/vulkan/libresoc_shader.c @@ -94,6 +94,17 @@ libresoc_dump_nir_shaders(struct nir_shader * const *shaders, return ret; } +static void +shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) +{ + assert(glsl_type_is_vector_or_scalar(type)); + + uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, + *align = comp_size; +} + nir_shader * libresoc_shader_compile_to_nir(struct libresoc_device *device, struct libresoc_shader_module *module, @@ -162,10 +173,160 @@ libresoc_shader_compile_to_nir(struct libresoc_device *device, assert(nir->info.stage == stage); nir_validate_shader(nir, "after spirv_to_nir"); + free(spec_entries); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); + + /* Pick off the single entrypoint that we want */ + /* TODO: enable following code if I know what it is doing + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func->is_entrypoint) + func->name = ralloc_strdup(func, "main"); + else + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + */ + + /* Make sure we lower constant initializers on output variables so that + * nir_remove_dead_variables below sees the corresponding stores + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out); + + /* Now that we've deleted all but the main function, we can go ahead and + * lower the rest of the constant initializers. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); + + /* Split member structs. We do this before lower_io_to_temporaries so that + * it doesn't lower system values to temporaries by accident. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out); + if (nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(nir, nir_lower_input_attachments, + &(nir_input_attachment_options) { + .use_fragcoord_sysval = true, + .use_layer_id_sysval = false, + }); + + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, + NULL); + + NIR_PASS_V(nir, nir_propagate_invariant); + + NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); + + NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + + // if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) + // NIR_PASS_V(nir, nir_lower_discard_to_demote); + + nir_lower_doubles_options lower_doubles = + nir->options->lower_doubles_options; + //TODO: if required enable following + //lower_doubles |= nir_lower_dfloor; + + + NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles); + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + if (nir->info.stage == MESA_SHADER_GEOMETRY) + nir_lower_gs_intrinsics(nir, true); + + static const nir_lower_tex_options tex_options = { + .lower_txp = ~0, + .lower_tg4_offsets = true, + }; + + nir_lower_tex(nir, &tex_options); + + nir_lower_vars_to_ssa(nir); + + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_GEOMETRY || + nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, true); + } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, false); + } + + nir_split_var_copies(nir); + + nir_lower_global_vars_to_local(nir); + nir_remove_dead_variables(nir, nir_var_function_temp, NULL); + // bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7; + // nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) { + // .subgroup_size = subgroup_size, + // .ballot_bit_size = ballot_bit_size, + // .lower_to_scalar = 1, + // .lower_subgroup_masks = 1, + // .lower_shuffle = 1, + // .lower_shuffle_to_32bit = 1, + // .lower_vote_eq_to_ballot = 1, + // .lower_quad_broadcast_dynamic = 1, + // .lower_quad_broadcast_dynamic_to_const = gfx7minus, + // .lower_shuffle_to_swizzle_amd = 1, + // }); + + nir_lower_load_const_to_scalar(nir); + + // if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) + // radv_optimize_nir(nir, false, true); + + /* call radv_nir_lower_ycbcr_textures() late as there might still be + * tex with undef texture/sampler before first optimization */ + // NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout); + + /* We call nir_lower_var_copies() after the first radv_optimize_nir() + * to remove any copies introduced by nir_opt_find_array_copies(). + */ + nir_lower_var_copies(nir); + + /* Lower deref operations for compute shared memory. */ + if (nir->info.stage == MESA_SHADER_COMPUTE) { + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_var_info); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_shared, nir_address_format_32bit_offset); + } + + /* Lower large variables that are always constant with load_constant + * intrinsics, which get turned into PC-relative loads from a data + * section next to the shader. + */ + NIR_PASS_V(nir, nir_opt_large_constants, + glsl_get_natural_size_align_bytes, 16); + + /* Indirect lowering must be called after the radv_optimize_nir() loop + * has been called at least once. Otherwise indirect lowering can + * bloat the instruction count of the loop and cause it to be + * considered too large for unrolling. + */ + // ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class); + // radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false); + if (device->instance->debug_flags & LIBRESOC_DEBUG_DUMP_NIR) nir_print_shader(nir, stderr); - libresoc_nir_translate(&device->instance->llvm_ref, nir); - free(spec_entries); } return nir; } diff --git a/src/libre-soc/vulkan/libresoc_shader.h b/src/libre-soc/vulkan/libresoc_shader.h index 14164ad3d1a..9d5a407d595 100644 --- a/src/libre-soc/vulkan/libresoc_shader.h +++ b/src/libre-soc/vulkan/libresoc_shader.h @@ -48,6 +48,7 @@ struct libresoc_shader_module { unsigned char sha1[20]; uint32_t size; char data[0]; + LLVMModuleRef llvm_module; }; nir_shader * diff --git a/src/libre-soc/vulkan/libresoc_shader_args.c b/src/libre-soc/vulkan/libresoc_shader_args.c new file mode 100644 index 00000000000..90daa8fde3d --- /dev/null +++ b/src/libre-soc/vulkan/libresoc_shader_args.c @@ -0,0 +1,32 @@ + +#include "libresoc_shader_args.h" + +#include "nir/nir_builder.h" + +void add_arg(struct shader_args *info, enum arg_regfile regfile, unsigned size, + enum arg_type type, struct arg *arg) +{ + assert(info->arg_count < MAX_ARGS); + + unsigned offset; + if (regfile == ARG_SGPR) { + offset = info->num_sgprs_used; + info->num_sgprs_used += size; + } else { + assert(regfile == ARG_VGPR); + offset = info->num_vgprs_used; + info->num_vgprs_used += size; + } + + info->args[info->arg_count].file = regfile; + info->args[info->arg_count].offset = offset; + info->args[info->arg_count].size = size; + info->args[info->arg_count].type = type; + + if (arg) { + arg->arg_index = info->arg_count; + arg->used = true; + } + + info->arg_count++; +} diff --git a/src/libre-soc/vulkan/libresoc_shader_args.h b/src/libre-soc/vulkan/libresoc_shader_args.h new file mode 100644 index 00000000000..129bd38b0d2 --- /dev/null +++ b/src/libre-soc/vulkan/libresoc_shader_args.h @@ -0,0 +1,90 @@ +#ifndef LIBRESOC_SHADER_ARGS_H +#define LIBRESOC_SHADER_ARGS_H + +#include +#include + +#define MAX_INLINE_PUSH_CONSTS 8 + +enum arg_regfile +{ + ARG_SGPR, + ARG_VGPR, +}; + +enum arg_type +{ + ARG_FLOAT, + ARG_INT, + ARG_CONST_PTR, /* Pointer to i8 array */ + ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */ + ARG_CONST_PTR_PTR, /* Pointer to pointer to i8 array */ + ARG_CONST_DESC_PTR, /* Pointer to v4i32 array */ + ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */ +}; + +struct arg { + uint8_t arg_index; + bool used; +}; + +#define MAX_ARGS 128 + +struct shader_args { + /* Info on how to declare arguments */ + struct { + enum arg_type type; + enum arg_regfile file; + uint8_t offset; + uint8_t size; + bool skip; + } args[MAX_ARGS]; + + uint8_t arg_count; + uint8_t sgpr_count; + uint8_t num_sgprs_used; + uint8_t num_vgprs_used; + + struct arg base_vertex; + struct arg start_instance; + struct arg draw_id; + struct arg vertex_id; + struct arg instance_id; + struct arg tcs_patch_id; + struct arg tcs_rel_ids; + struct arg tes_patch_id; + struct arg gs_prim_id; + struct arg gs_invocation_id; + + /* PS */ + struct arg frag_pos[4]; + struct arg front_face; + struct arg ancillary; + struct arg sample_coverage; + struct arg prim_mask; + struct arg persp_sample; + struct arg persp_center; + struct arg persp_centroid; + struct arg pull_model; + struct arg linear_sample; + struct arg linear_center; + struct arg linear_centroid; + + /* CS */ + struct arg local_invocation_ids; + struct arg num_work_groups; + struct arg workgroup_ids[3]; + struct arg tg_size; + + /* Vulkan only */ + struct arg push_constants; + struct arg inline_push_consts[MAX_INLINE_PUSH_CONSTS]; + unsigned num_inline_push_consts; + unsigned base_inline_push_consts; + struct arg view_index; +}; + +void add_arg(struct shader_args *info, enum arg_regfile regfile, unsigned registers, + enum arg_type type, struct arg *arg); + +#endif diff --git a/src/libre-soc/vulkan/meson.build b/src/libre-soc/vulkan/meson.build index b8ba6d29c7c..e1dfe46af6a 100644 --- a/src/libre-soc/vulkan/meson.build +++ b/src/libre-soc/vulkan/meson.build @@ -78,6 +78,8 @@ liblibresoc_files = files( 'libresoc_private.h', 'vk_format.h', 'libresoc_llvm.c', + 'libresoc_llvm_build.c', + 'libresoc_shader_args.c', ) libresoc_deps = []