X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Famd%2Fllvm%2Fac_llvm_build.c;h=77d3f7e73fbe84cf49548ad95d247b14a8a04d0b;hp=f789ff5a368f650bee70a45235bb257183e70e71;hb=a79dad950b1f10ddeca2c907025a0f649b470cb9;hpb=77393cf39b7c4ae73c1c1731bddc9a0668740338 diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index f789ff5a368..77d3f7e73fb 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -65,8 +65,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, enum ac_float_mode float_mode, unsigned wave_size, unsigned ballot_mask_bits) { - LLVMValueRef args[1]; - ctx->context = LLVMContextCreate(); ctx->chip_class = chip_class; @@ -91,6 +89,9 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); ctx->v2i16 = LLVMVectorType(ctx->i16, 2); + ctx->v4i16 = LLVMVectorType(ctx->i16, 4); + ctx->v2f16 = LLVMVectorType(ctx->f16, 2); + ctx->v4f16 = LLVMVectorType(ctx->f16, 4); ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); @@ -127,11 +128,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14); - ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6); - - args[0] = LLVMConstReal(ctx->f32, 2.5); - ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); - ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); @@ -464,11 +460,10 @@ ac_build_optimization_barrier(struct ac_llvm_context *ctx, } LLVMValueRef -ac_build_shader_clock(struct ac_llvm_context *ctx) +ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope) { - const char *intr = LLVM_VERSION_MAJOR >= 9 && ctx->chip_class >= GFX8 ? - "llvm.amdgcn.s.memrealtime" : "llvm.readcyclecounter"; - LLVMValueRef tmp = ac_build_intrinsic(ctx, intr, ctx->i64, NULL, 0, 0); + const char *name = scope == NIR_SCOPE_DEVICE ? "llvm.amdgcn.s.memrealtime" : "llvm.amdgcn.s.memtime"; + LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0); return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, ""); } @@ -707,20 +702,25 @@ ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) { - /* If we do (num / den), LLVM >= 7.0 does: - * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f)); - * - * If we do (num * (1 / den)), LLVM does: - * return num * v_rcp_f32(den); - */ - LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0); - LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, ""); - LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, ""); + unsigned type_size = ac_get_type_size(LLVMTypeOf(den)); + const char *name; - /* Use v_rcp_f32 instead of precise division. */ - if (!LLVMIsConstant(ret)) - LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); - return ret; + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && + type_size == 8) + return LLVMBuildFDiv(ctx->builder, num, den, ""); + + if (type_size == 2) + name = "llvm.amdgcn.rcp.f16"; + else if (type_size == 4) + name = "llvm.amdgcn.rcp.f32"; + else + name = "llvm.amdgcn.rcp.f64"; + + LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den), + &den, 1, AC_FUNC_ATTR_READNONE); + + return LLVMBuildFMul(ctx->builder, num, rcp, ""); } /* See fast_idiv_by_const.h. */ @@ -1188,8 +1188,6 @@ ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, - unsigned num_channels, - LLVMTypeRef return_channel_type, unsigned cache_policy, bool use_format, bool structurized) @@ -1203,12 +1201,10 @@ ac_build_buffer_store_common(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0); - unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; - LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type; - ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); + ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name)); if (use_format) { snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s", @@ -1228,13 +1224,10 @@ ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef data, LLVMValueRef vindex, LLVMValueRef voffset, - unsigned num_channels, unsigned cache_policy) { - ac_build_buffer_store_common(ctx, rsrc, data, vindex, - voffset, NULL, num_channels, - ctx->f32, cache_policy, - true, true); + ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, + cache_policy, true, true); } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. @@ -1283,7 +1276,6 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), ctx->i32_0, voffset, offset, - num_channels, ctx->f32, cache_policy, false, false); return; } @@ -1327,6 +1319,11 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx, const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; + /* D16 is only supported on gfx8+ */ + assert(!use_format || + (channel_type != ctx->f16 && channel_type != ctx->i16) || + ctx->chip_class >= GFX8); + LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type; ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); @@ -1402,10 +1399,12 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef voffset, unsigned num_channels, unsigned cache_policy, - bool can_speculate) + bool can_speculate, + bool d16) { return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, - ctx->i32_0, num_channels, ctx->f32, + ctx->i32_0, num_channels, + d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true, true); } @@ -1657,7 +1656,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, } int log_recombine = 0; - if (ctx->chip_class == GFX6 && !known_aligned) { + if ((ctx->chip_class == GFX6 || ctx->chip_class == GFX10) && !known_aligned) { /* Avoid alignment restrictions by loading one byte at a time. */ load_num_channels <<= load_log_size; log_recombine = load_log_size; @@ -1941,8 +1940,7 @@ ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, if (LLVM_VERSION_MAJOR >= 9) { /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, - voffset, soffset, 1, - ctx->i16, cache_policy, + voffset, soffset, cache_policy, false, false); } else { unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; @@ -1968,8 +1966,7 @@ ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, if (LLVM_VERSION_MAJOR >= 9) { /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, - voffset, soffset, 1, - ctx->i8, cache_policy, + voffset, soffset, cache_policy, false, false); } else { unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; @@ -2064,6 +2061,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx, if (result_type == ctx->f16) val = LLVMBuildZExt(ctx->builder, val, ctx->i32, ""); + else if (result_type == ctx->v2f16) + val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, ""); for (unsigned i = 0; i < 4; ++i) { tl_lanes[i] = i & mask; @@ -2197,8 +2196,10 @@ ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) { - char name[64]; - snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a))); + char name[64], type[64]; + + ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.minnum.%s", type); LLVMValueRef args[2] = {a, b}; return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE); @@ -2207,8 +2208,10 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) { - char name[64]; - snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a))); + char name[64], type[64]; + + ac_build_type_name_for_intr(LLVMTypeOf(a), type, sizeof(type)); + snprintf(name, sizeof(name), "llvm.maxnum.%s", type); LLVMValueRef args[2] = {a, b}; return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE); @@ -2257,13 +2260,10 @@ void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a) args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); if (a->compr) { - LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context); - LLVMTypeRef v2i16 = LLVMVectorType(i16, 2); - args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], - v2i16, ""); + ctx->v2i16, ""); args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], - v2i16, ""); + ctx->v2i16, ""); args[4] = LLVMConstInt(ctx->i1, a->done, 0); args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0); @@ -2380,6 +2380,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <= 1); + assert((a->min_lod ? 1 : 0) + + (a->lod ? 1 : 0) + + (a->level_zero ? 1 : 0) <= 1); + assert(!a->d16 || (ctx->chip_class >= GFX8 && + a->opcode != ac_image_atomic && + a->opcode != ac_image_atomic_cmpswap && + a->opcode != ac_image_get_lod && + a->opcode != ac_image_get_resinfo)); if (a->opcode == ac_image_get_lod) { switch (dim) { @@ -2435,6 +2443,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, ""); if (a->lod) args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, ""); + if (a->min_lod) + args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, ""); + overload[num_overloads++] = sample ? ".f32" : ".i32"; args[num_args++] = a->resource; @@ -2488,7 +2499,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, char intr_name[96]; snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.%s%s" /* base name */ - "%s%s%s" /* sample/gather modifiers */ + "%s%s%s%s" /* sample/gather modifiers */ ".%s.%s%s%s%s", /* dimension and type overloads */ name, atomic_subop, a->compare ? ".c" : "", @@ -2496,9 +2507,10 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, lod_suffix ? ".l" : a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "", + a->min_lod ? ".cl" : "", a->offset ? ".o" : "", dimname, - atomic ? "i32" : "v4f32", + atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"), overload[0], overload[1], overload[2]); LLVMTypeRef retty; @@ -2507,15 +2519,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip) retty = ctx->voidt; else - retty = ctx->v4f32; + retty = a->d16 ? ctx->v4f16 : ctx->v4f32; LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, retty, args, num_args, a->attributes); - if (!sample && retty == ctx->v4f32) { - result = LLVMBuildBitCast(ctx->builder, result, - ctx->v4i32, ""); - } + if (!sample && !atomic && retty != ctx->voidt) + result = ac_to_integer(ctx, result); + return result; } @@ -2541,10 +2552,7 @@ LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) { - LLVMTypeRef v2f16 = - LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2); - - return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16, + return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2, AC_FUNC_ATTR_READNONE); } @@ -2719,33 +2727,6 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags) ctx->voidt, args, 1, 0); } -LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, - LLVMValueRef src1, LLVMValueRef src2, - unsigned bitsize) -{ - LLVMTypeRef type; - char *intr; - - if (bitsize == 16) { - intr = "llvm.amdgcn.fmed3.f16"; - type = ctx->f16; - } else if (bitsize == 32) { - intr = "llvm.amdgcn.fmed3.f32"; - type = ctx->f32; - } else { - intr = "llvm.amdgcn.fmed3.f64"; - type = ctx->f64; - } - - LLVMValueRef params[] = { - src0, - src1, - src2, - }; - return ac_build_intrinsic(ctx, intr, type, params, 3, - AC_FUNC_ATTR_READNONE); -} - LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) { @@ -3067,6 +3048,7 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn, uint8_t *vs_output_param_offset, uint32_t num_outputs, + uint32_t skip_output_mask, uint8_t *num_param_exports) { LLVMBasicBlockRef bb; @@ -3133,12 +3115,13 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, } /* Eliminate constant and duplicated PARAM exports. */ - if (ac_eliminate_const_output(vs_output_param_offset, - num_outputs, &exp) || - ac_eliminate_duplicated_output(ctx, - vs_output_param_offset, - num_outputs, &exports, - &exp)) { + if (!((1u << target) & skip_output_mask) && + (ac_eliminate_const_output(vs_output_param_offset, + num_outputs, &exp) || + ac_eliminate_duplicated_output(ctx, + vs_output_param_offset, + num_outputs, &exports, + &exp))) { removed_any = true; } else { exports.exp[exports.num++] = exp; @@ -3590,12 +3573,14 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, } static LLVMValueRef -_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) +_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, + LLVMValueRef lane, bool with_opt_barrier) { LLVMTypeRef type = LLVMTypeOf(src); LLVMValueRef result; - ac_build_optimization_barrier(ctx, &src); + if (with_opt_barrier) + ac_build_optimization_barrier(ctx, &src); src = LLVMBuildZExt(ctx->builder, src, ctx->i32, ""); if (lane) @@ -3611,15 +3596,10 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l return LLVMBuildTrunc(ctx->builder, result, type, ""); } -/** - * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic. - * @param ctx - * @param src - * @param lane - id of the lane or NULL for the first active lane - * @return value of the lane - */ -LLVMValueRef -ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) +static LLVMValueRef +ac_build_readlane_common(struct ac_llvm_context *ctx, + LLVMValueRef src, LLVMValueRef lane, + bool with_opt_barrier) { LLVMTypeRef src_type = LLVMTypeOf(src); src = ac_to_integer(ctx, src); @@ -3633,14 +3613,19 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la LLVMBuildBitCast(ctx->builder, src, vec_type, ""); ret = LLVMGetUndef(vec_type); for (unsigned i = 0; i < bits / 32; i++) { + LLVMValueRef ret_comp; + src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), ""); - LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane); + + ret_comp = _ac_build_readlane(ctx, src, lane, + with_opt_barrier); + ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), ""); } } else { - ret = _ac_build_readlane(ctx, src, lane); + ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier); } if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) @@ -3648,6 +3633,30 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } +/** + * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic. + * + * The optimization barrier is not needed if the value is the same in all lanes + * or if this is called in the outermost block. + * + * @param ctx + * @param src + * @param lane - id of the lane or NULL for the first active lane + * @return value of the lane + */ +LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, + LLVMValueRef src, LLVMValueRef lane) +{ + return ac_build_readlane_common(ctx, src, lane, false); +} + + +LLVMValueRef +ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane) +{ + return ac_build_readlane_common(ctx, src, lane, true); +} + LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane) { @@ -3664,9 +3673,7 @@ ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) (LLVMValueRef []) { mask, ctx->i32_0 }, 2, AC_FUNC_ATTR_READNONE); } - LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, - LLVMVectorType(ctx->i32, 2), - ""); + LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, ""); LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_0, ""); LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, @@ -4663,6 +4670,24 @@ ac_build_load_helper_invocation(struct ac_llvm_context *ctx) return LLVMBuildSExt(ctx->builder, result, ctx->i32, ""); } +LLVMValueRef +ac_build_is_helper_invocation(struct ac_llvm_context *ctx) +{ + if (!ctx->postponed_kill) + return ac_build_load_helper_invocation(ctx); + + /* !(exact && postponed) */ + LLVMValueRef exact = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", + ctx->i1, NULL, 0, + AC_FUNC_ATTR_READNONE); + + LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, ""); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, exact, postponed, ""); + + return LLVMBuildSelect(ctx->builder, result, ctx->i32_0, + LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), ""); +} + LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args, unsigned num_args) { @@ -4750,6 +4775,18 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav { LLVMBuilderRef builder = ctx->builder; LLVMValueRef tmp; + bool export_dummy_prim = false; + + /* HW workaround for a GPU hang with 100% culling. + * We always have to export at least 1 primitive. + * Export a degenerate triangle using vertex 0 for all 3 vertices. + */ + if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) { + assert(vtx_cnt == ctx->i32_0); + prim_cnt = ctx->i32_1; + vtx_cnt = ctx->i32_1; + export_dummy_prim = true; + } ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); @@ -4757,6 +4794,24 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); + if (export_dummy_prim) { + struct ac_ngg_prim prim = {}; + /* The vertex indices are 0,0,0. */ + prim.passthrough = ctx->i32_0; + + struct ac_export_args pos = {}; + pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0; + pos.target = V_008DFC_SQ_EXP_POS; + pos.enabled_channels = 0xf; + pos.done = true; + + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), + ctx->i32_0, ""), 5021); + ac_build_export_prim(ctx, &prim); + ac_build_export(ctx, &pos); + ac_build_endif(ctx, 5021); + } + ac_build_endif(ctx, 5020); } @@ -4885,10 +4940,20 @@ ac_build_main(const struct ac_shader_args *args, if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); + ac_add_attr_alignment(P, 32); } } ctx->main_function = main_function; + + if (LLVM_VERSION_MAJOR >= 11) { + /* Enable denormals for FP16 and FP64: */ + LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", + "ieee,ieee"); + /* Disable denormals for FP32: */ + LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32", + "preserve-sign,preserve-sign"); + } return main_function; } @@ -4957,3 +5022,38 @@ LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx, return LLVMBuildAdd(builder, prefix_bcnt[0], prefix_bcnt[1], ""); #endif } + +/** + * Convert triangle strip indices to triangle indices. This is used to decompose + * triangle strips into triangles. + */ +void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, + LLVMValueRef is_odd, + LLVMValueRef flatshade_first, + LLVMValueRef index[3]) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef out[3]; + + /* We need to change the vertex order for odd triangles to get correct + * front/back facing by swapping 2 vertex indices, but we also have to + * keep the provoking vertex in the same place. + * + * If the first vertex is provoking, swap index 1 and 2. + * If the last vertex is provoking, swap index 0 and 1. + */ + out[0] = LLVMBuildSelect(builder, flatshade_first, + index[0], + LLVMBuildSelect(builder, is_odd, + index[1], index[0], ""), ""); + out[1] = LLVMBuildSelect(builder, flatshade_first, + LLVMBuildSelect(builder, is_odd, + index[2], index[1], ""), + LLVMBuildSelect(builder, is_odd, + index[0], index[1], ""), ""); + out[2] = LLVMBuildSelect(builder, flatshade_first, + LLVMBuildSelect(builder, is_odd, + index[1], index[2], ""), + index[2], ""); + memcpy(index, out, sizeof(out)); +}