X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_llvm_build.c;h=c74a47a79984737178f29dcc21c9b3d84b84835d;hb=bedfa06eaf242a87c1b590f1504b8082d3d8ac1f;hp=8a329515b57b0877efea2e9c1cc8cf3a8cfbd836;hpb=94736d31c364635a76a11e0bd4f046a42d2221d5;p=mesa.git diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 8a329515b57..c74a47a7998 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -47,11 +47,12 @@ */ void ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, - enum chip_class chip_class) + enum chip_class chip_class, enum radeon_family family) { LLVMValueRef args[1]; ctx->chip_class = chip_class; + ctx->family = family; ctx->context = context; ctx->module = NULL; @@ -66,7 +67,10 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); + ctx->v2i32 = LLVMVectorType(ctx->i32, 2); + ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); + ctx->v2f32 = LLVMVectorType(ctx->f32, 2); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); @@ -75,6 +79,9 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0); + ctx->i1false = LLVMConstInt(ctx->i1, 0, false); + ctx->i1true = LLVMConstInt(ctx->i1, 1, false); + ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, "range", 5); @@ -252,6 +259,20 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) } } +/** + * Helper function that builds an LLVM IR PHI node and immediately adds + * incoming edges. + */ +LLVMValueRef +ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, + unsigned count_incoming, LLVMValueRef *values, + LLVMBasicBlockRef *blocks) +{ + LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, ""); + LLVMAddIncoming(phi, values, blocks, count_incoming); + return phi; +} + /* Prevent optimizations (at least of memory accesses) across the current * point in the program by emitting empty inline assembly that is marked as * having side effects. @@ -350,6 +371,28 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) return LLVMBuildOr(ctx->builder, all, none, ""); } +LLVMValueRef +ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned component) +{ + LLVMValueRef vec = NULL; + + if (value_count == 1) { + return values[component]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (unsigned i = component; i < value_count + component; i++) { + LLVMValueRef value = values[i]; + + if (!i) + vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); + vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); + } + return vec; +} + LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, @@ -397,6 +440,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx, { LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); + /* Use v_rcp_f32 instead of precise division. */ if (!LLVMIsConstant(ret)) LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); return ret; @@ -438,12 +482,13 @@ build_cube_intrinsic(struct ac_llvm_context *ctx, * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards * the selcoords major axis. */ -static void build_cube_select(LLVMBuilderRef builder, +static void build_cube_select(struct ac_llvm_context *ctx, const struct cube_selection_coords *selcoords, const LLVMValueRef *coords, LLVMValueRef *out_st, LLVMValueRef *out_ma) { + LLVMBuilderRef builder = ctx->builder; LLVMTypeRef f32 = LLVMTypeOf(coords[0]); LLVMValueRef is_ma_positive; LLVMValueRef sgn_ma; @@ -465,24 +510,24 @@ static void build_cube_select(LLVMBuilderRef builder, is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); /* Select sc */ - tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); + tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), - LLVMBuildSelect(builder, is_ma_x, sgn_ma, + LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); /* Select tc */ tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); - sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""), + sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), ""); out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); /* Select ma */ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); - sgn = LLVMBuildSelect(builder, is_ma_positive, - LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); - *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); + tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", + ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE); + *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); } void @@ -570,7 +615,7 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx, * seems awfully quiet about how textureGrad for cube * maps should be handled. */ - build_cube_select(builder, &selcoords, &derivs_arg[axis * 3], + build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma); deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); @@ -702,32 +747,40 @@ ac_build_indexed_store(struct ac_llvm_context *ctx, * \param base_ptr Where the array starts. * \param index The element index into the array. * \param uniform Whether the base_ptr and index can be assumed to be - * dynamically uniform + * dynamically uniform (i.e. load to an SGPR) + * \param invariant Whether the load is invariant (no other opcodes affect it) */ -LLVMValueRef -ac_build_indexed_load(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - bool uniform) +static LLVMValueRef +ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index, bool uniform, bool invariant) { - LLVMValueRef pointer; + LLVMValueRef pointer, result; pointer = ac_build_gep0(ctx, base_ptr, index); if (uniform) LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); - return LLVMBuildLoad(ctx->builder, pointer, ""); + result = LLVMBuildLoad(ctx->builder, pointer, ""); + if (invariant) + LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); + return result; } -/** - * Do a load from &base_ptr[index], but also add a flag that it's loading - * a constant from a dynamically uniform index. - */ -LLVMValueRef -ac_build_indexed_load_const(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index) { - LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true); - LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); - return result; + return ac_build_load_custom(ctx, base_ptr, index, false, false); +} + +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index) +{ + return ac_build_load_custom(ctx, base_ptr, index, false, true); +} + +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index) +{ + return ac_build_load_custom(ctx, base_ptr, index, true, true); } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. @@ -745,10 +798,13 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, bool glc, bool slc, bool writeonly_memory, - bool has_add_tid) + bool swizzle_enable_hint) { - /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */ - if (!has_add_tid) { + /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset + * (voffset is swizzled, but soffset isn't swizzled). + * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. + */ + if (!swizzle_enable_hint) { /* Split 3 channel stores, becase LLVM doesn't support 3-channel * intrinsics. */ if (num_channels == 3) { @@ -762,11 +818,11 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, soffset, inst_offset, glc, slc, - writeonly_memory, has_add_tid); + writeonly_memory, swizzle_enable_hint); ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, soffset, inst_offset + 8, glc, slc, - writeonly_memory, has_add_tid); + writeonly_memory, swizzle_enable_hint); return; } @@ -920,8 +976,8 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), vindex, voffset, - LLVMConstInt(ctx->i1, 0, 0), /* glc */ - LLVMConstInt(ctx->i1, 0, 0), /* slc */ + ctx->i1false, /* glc */ + ctx->i1false, /* slc */ }; return ac_build_intrinsic(ctx, @@ -1033,7 +1089,7 @@ ac_build_ddxy(struct ac_llvm_context *ctx, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } else { - uint32_t masks[2]; + uint32_t masks[2] = {}; switch (mask) { case AC_TID_MASK_TOP_LEFT: @@ -1052,6 +1108,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx, masks[0] = 0x80a0; masks[1] = 0x80f5; break; + default: + assert(0); } args[0] = val; @@ -1122,7 +1180,7 @@ ac_build_umsb(struct ac_llvm_context *ctx, { LLVMValueRef args[2] = { arg, - LLVMConstInt(ctx->i1, 1, 0), + ctx->i1true, }; LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32", dst_type, args, ARRAY_SIZE(args), @@ -1229,7 +1287,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, LLVMTypeRef dst_type; LLVMValueRef args[11]; unsigned num_args = 0; - const char *name; + const char *name = NULL; char intr_name[128], type[64]; if (HAVE_LLVM >= 0x0400) { @@ -1248,9 +1306,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); if (sample) args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); - args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */ - args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */ - args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */ + args[num_args++] = ctx->i1false; /* glc */ + args[num_args++] = ctx->i1false; /* slc */ + args[num_args++] = ctx->i1false; /* lwe */ args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0); switch (a->opcode) { @@ -1377,20 +1435,26 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, AC_FUNC_ATTR_LEGACY); } -/** - * KILL, AKA discard in GLSL. - * - * \param value kill if value < 0.0 or value == NULL. - */ -void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value) +LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) { - if (value) { - ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt, - &value, 1, AC_FUNC_ATTR_LEGACY); - } else { - ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt, - NULL, 0, AC_FUNC_ATTR_LEGACY); + assert(HAVE_LLVM >= 0x0600); + return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, + &i1, 1, AC_FUNC_ATTR_READNONE); +} + +void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) +{ + if (HAVE_LLVM >= 0x0600) { + ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, + &i1, 1, 0); + return; } + + LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1, + LLVMConstReal(ctx->f32, 1), + LLVMConstReal(ctx->f32, -1), ""); + ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt, + &value, 1, AC_FUNC_ATTR_LEGACY); } LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, @@ -1419,6 +1483,15 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, AC_FUNC_ATTR_LEGACY); } +void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16) +{ + LLVMValueRef args[1] = { + LLVMConstInt(ctx->i32, simm16, false), + }; + ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", + ctx->voidt, args, 1, 0); +} + void ac_get_image_intr_name(const char *base_name, LLVMTypeRef data_type, LLVMTypeRef coords_type, @@ -1706,3 +1779,65 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, *num_param_exports = exports.num; } } + +void ac_init_exec_full_mask(struct ac_llvm_context *ctx) +{ + LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); + ac_build_intrinsic(ctx, + "llvm.amdgcn.init.exec", ctx->voidt, + &full_mask, 1, AC_FUNC_ATTR_CONVERGENT); +} + +void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) +{ + unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768; + ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, + LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE), + "lds"); +} + +LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr) +{ + return ac_build_load(ctx, ctx->lds, dw_addr); +} + +void ac_lds_store(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr, + LLVMValueRef value) +{ + value = ac_to_integer(ctx, value); + ac_build_indexed_store(ctx, ctx->lds, + dw_addr, value); +} + +LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, + LLVMTypeRef dst_type, + LLVMValueRef src0) +{ + LLVMValueRef params[2] = { + src0, + + /* The value of 1 means that ffs(x=0) = undef, so LLVM won't + * add special code to check for x=0. The reason is that + * the LLVM behavior for x=0 is different from what we + * need here. However, LLVM also assumes that ffs(x) is + * in [0, 31], but GLSL expects that ffs(0) = -1, so + * a conditional assignment to handle 0 is still required. + * + * The hardware already implements the correct behavior. + */ + LLVMConstInt(ctx->i1, 1, false), + }; + + LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, + params, 2, + AC_FUNC_ATTR_READNONE); + + /* TODO: We need an intrinsic to skip this conditional. */ + /* Check for zero: */ + return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, + LLVMIntEQ, src0, + ctx->i32_0, ""), + LLVMConstInt(ctx->i32, -1, 0), lsb, ""); +}