From a0ce09b4b2a3063e49a02de3d12096cf462d10a3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 10 Jan 2017 15:35:27 +0100 Subject: [PATCH] amd/common: unify cube map coordinate handling between radeonsi and radv MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Code is taken from a combination of radv (for the more basic functions, to avoid gallivm dependencies) and radeonsi (for the new and improved derivative calculations). v2: add 0.5 offset to tex coords only after derivative calculation v3: - really only touch the first three coordinates - rebase on the removal of the 1.5 --> 0.5 offset change Reviewed-by: Bas Nieuwenhuizen (v2) Reviewed-by: Marek Olšák --- src/amd/common/ac_llvm_util.c | 364 ++++++++++++++++++ src/amd/common/ac_llvm_util.h | 57 +++ src/amd/common/ac_nir_to_llvm.c | 204 +--------- src/gallium/drivers/radeonsi/si_shader.c | 6 +- .../drivers/radeonsi/si_shader_internal.h | 2 + .../drivers/radeonsi/si_shader_tgsi_setup.c | 4 + 6 files changed, 440 insertions(+), 197 deletions(-) diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index a8408dd79e6..770e3bd13c1 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -32,6 +32,9 @@ #include #include +#include "util/bitscan.h" +#include "util/macros.h" + static void ac_init_llvm_target() { #if HAVE_LLVM < 0x0307 @@ -140,3 +143,364 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family) return tm; } + +/* Initialize module-independent parts of the context. + * + * The caller is responsible for initializing ctx::module and ctx::builder. + */ +void +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) +{ + LLVMValueRef args[1]; + + ctx->context = context; + ctx->module = NULL; + ctx->builder = NULL; + + ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); + ctx->f32 = LLVMFloatTypeInContext(ctx->context); + + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6); + + args[0] = LLVMConstReal(ctx->f32, 2.5); + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); +} + +#if HAVE_LLVM < 0x0400 +static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) +{ + switch (attr) { + case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; + case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; + case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; + case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; + case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; + case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; + case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; + default: + fprintf(stderr, "Unhandled function attribute: %x\n", attr); + return 0; + } +} + +#else + +static const char *attr_to_str(enum ac_func_attr attr) +{ + switch (attr) { + case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; + case AC_FUNC_ATTR_BYVAL: return "byval"; + case AC_FUNC_ATTR_INREG: return "inreg"; + case AC_FUNC_ATTR_NOALIAS: return "noalias"; + case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; + case AC_FUNC_ATTR_READNONE: return "readnone"; + case AC_FUNC_ATTR_READONLY: return "readonly"; + default: + fprintf(stderr, "Unhandled function attribute: %x\n", attr); + return 0; + } +} + +#endif + +void +ac_add_function_attr(LLVMValueRef function, + int attr_idx, + enum ac_func_attr attr) +{ + +#if HAVE_LLVM < 0x0400 + LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); + if (attr_idx == -1) { + LLVMAddFunctionAttr(function, llvm_attr); + } else { + LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); + } +#else + LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); + const char *attr_name = attr_to_str(attr); + unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, + strlen(attr_name)); + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); +#endif +} + +LLVMValueRef +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, + LLVMTypeRef return_type, LLVMValueRef *params, + unsigned param_count, unsigned attrib_mask) +{ + LLVMValueRef function; + + function = LLVMGetNamedFunction(ctx->module, name); + if (!function) { + LLVMTypeRef param_types[32], function_type; + unsigned i; + + assert(param_count <= 32); + + for (i = 0; i < param_count; ++i) { + assert(params[i]); + param_types[i] = LLVMTypeOf(params[i]); + } + function_type = + LLVMFunctionType(return_type, param_types, param_count, 0); + function = LLVMAddFunction(ctx->module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; + while (attrib_mask) { + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); + ac_add_function_attr(function, -1, attr); + } + } + return LLVMBuildCall(ctx->builder, function, params, param_count, ""); +} + +LLVMValueRef +ac_build_gather_values_extended(struct ac_llvm_context *ctx, + LLVMValueRef *values, + unsigned value_count, + unsigned value_stride, + bool load) +{ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef vec; + unsigned i; + + + if (value_count == 1) { + if (load) + return LLVMBuildLoad(builder, values[0], ""); + return values[0]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (i = 0; i < value_count; i++) { + LLVMValueRef value = values[i * value_stride]; + if (load) + value = LLVMBuildLoad(builder, value, ""); + + if (!i) + vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); + vec = LLVMBuildInsertElement(builder, vec, value, index, ""); + } + return vec; +} + +LLVMValueRef +ac_build_gather_values(struct ac_llvm_context *ctx, + LLVMValueRef *values, + unsigned value_count) +{ + return ac_build_gather_values_extended(ctx, values, value_count, 1, false); +} + +LLVMValueRef +ac_emit_fdiv(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef den) +{ + LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); + + if (!LLVMIsConstant(ret)) + LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); + return ret; +} + +/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 + * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is + * already multiplied by two. id is the cube face number. + */ +struct cube_selection_coords { + LLVMValueRef stc[2]; + LLVMValueRef ma; + LLVMValueRef id; +}; + +static void +build_cube_intrinsic(struct ac_llvm_context *ctx, + LLVMValueRef in[3], + struct cube_selection_coords *out) +{ + LLVMBuilderRef builder = ctx->builder; + + if (HAVE_LLVM >= 0x0309) { + LLVMTypeRef f32 = ctx->f32; + + out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc", + f32, in, 3, AC_FUNC_ATTR_READNONE); + out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc", + f32, in, 3, AC_FUNC_ATTR_READNONE); + out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", + f32, in, 3, AC_FUNC_ATTR_READNONE); + out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", + f32, in, 3, AC_FUNC_ATTR_READNONE); + } else { + LLVMValueRef c[4] = { + in[0], + in[1], + in[2], + LLVMGetUndef(LLVMTypeOf(in[0])) + }; + LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); + + LLVMValueRef tmp = + ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", + LLVMTypeOf(vec), &vec, 1, + AC_FUNC_ATTR_READNONE); + + out->stc[1] = LLVMBuildExtractElement(builder, tmp, + LLVMConstInt(ctx->i32, 0, 0), ""); + out->stc[0] = LLVMBuildExtractElement(builder, tmp, + LLVMConstInt(ctx->i32, 1, 0), ""); + out->ma = LLVMBuildExtractElement(builder, tmp, + LLVMConstInt(ctx->i32, 2, 0), ""); + out->id = LLVMBuildExtractElement(builder, tmp, + LLVMConstInt(ctx->i32, 3, 0), ""); + } +} + +/** + * Build a manual selection sequence for cube face sc/tc coordinates and + * major axis vector (multiplied by 2 for consistency) for the given + * vec3 \p coords, for the face implied by \p selcoords. + * + * For the major axis, we always adjust the sign to be in the direction of + * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards + * the selcoords major axis. + */ +static void build_cube_select(LLVMBuilderRef builder, + const struct cube_selection_coords *selcoords, + const LLVMValueRef *coords, + LLVMValueRef *out_st, + LLVMValueRef *out_ma) +{ + LLVMTypeRef f32 = LLVMTypeOf(coords[0]); + LLVMValueRef is_ma_positive; + LLVMValueRef sgn_ma; + LLVMValueRef is_ma_z, is_not_ma_z; + LLVMValueRef is_ma_y; + LLVMValueRef is_ma_x; + LLVMValueRef sgn; + LLVMValueRef tmp; + + is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, + selcoords->ma, LLVMConstReal(f32, 0.0), ""); + sgn_ma = LLVMBuildSelect(builder, is_ma_positive, + LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); + + is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); + is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); + is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, + LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); + is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); + + /* Select sc */ + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), + LLVMBuildSelect(builder, is_ma_x, sgn_ma, + LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); + out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); + + /* Select tc */ + tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""), + LLVMConstReal(f32, -1.0), ""); + out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); + + /* Select ma */ + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], + LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); + sgn = LLVMBuildSelect(builder, is_ma_positive, + LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); + *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); +} + +void +ac_prepare_cube_coords(struct ac_llvm_context *ctx, + bool is_deriv, bool is_array, + LLVMValueRef *coords_arg, + LLVMValueRef *derivs_arg) +{ + + LLVMBuilderRef builder = ctx->builder; + struct cube_selection_coords selcoords; + LLVMValueRef coords[3]; + LLVMValueRef invma; + + build_cube_intrinsic(ctx, coords_arg, &selcoords); + + invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32", + ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); + invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); + + for (int i = 0; i < 2; ++i) + coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); + + coords[2] = selcoords.id; + + if (is_deriv && derivs_arg) { + LLVMValueRef derivs[4]; + int axis; + + /* Convert cube derivatives to 2D derivatives. */ + for (axis = 0; axis < 2; axis++) { + LLVMValueRef deriv_st[2]; + LLVMValueRef deriv_ma; + + /* Transform the derivative alongside the texture + * coordinate. Mathematically, the correct formula is + * as follows. Assume we're projecting onto the +Z face + * and denote by dx/dh the derivative of the (original) + * X texture coordinate with respect to horizontal + * window coordinates. The projection onto the +Z face + * plane is: + * + * f(x,z) = x/z + * + * Then df/dh = df/dx * dx/dh + df/dz * dz/dh + * = 1/z * dx/dh - x/z * 1/z * dz/dh. + * + * This motivatives the implementation below. + * + * Whether this actually gives the expected results for + * apps that might feed in derivatives obtained via + * finite differences is anyone's guess. The OpenGL spec + * seems awfully quiet about how textureGrad for cube + * maps should be handled. + */ + build_cube_select(builder, &selcoords, &derivs_arg[axis * 3], + deriv_st, &deriv_ma); + + deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); + + for (int i = 0; i < 2; ++i) + derivs[axis * 2 + i] = + LLVMBuildFSub(builder, + LLVMBuildFMul(builder, deriv_st[i], invma, ""), + LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); + } + + memcpy(derivs_arg, derivs, sizeof(derivs)); + } + + /* Shift the texture coordinate. This must be applied after the + * derivative calculation. + */ + for (int i = 0; i < 2; ++i) + coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); + + if (is_array) { + /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ + /* coords_arg.w component - array_index for cube arrays */ + LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); + coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); + } + + memcpy(coords_arg, coords, sizeof(coords)); +} diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index d9ea9bd185a..802c2662470 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -33,11 +33,68 @@ extern "C" { #endif +enum ac_func_attr { + AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), + AC_FUNC_ATTR_BYVAL = (1 << 1), + AC_FUNC_ATTR_INREG = (1 << 2), + AC_FUNC_ATTR_NOALIAS = (1 << 3), + AC_FUNC_ATTR_NOUNWIND = (1 << 4), + AC_FUNC_ATTR_READNONE = (1 << 5), + AC_FUNC_ATTR_READONLY = (1 << 6), + AC_FUNC_ATTR_LAST = (1 << 7) +}; + +struct ac_llvm_context { + LLVMContextRef context; + LLVMModuleRef module; + LLVMBuilderRef builder; + + LLVMTypeRef i32; + LLVMTypeRef f32; + + unsigned fpmath_md_kind; + LLVMValueRef fpmath_md_2p5_ulp; +}; + LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); +void +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context); + +void +ac_add_function_attr(LLVMValueRef function, + int attr_idx, + enum ac_func_attr attr); +LLVMValueRef +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, + LLVMTypeRef return_type, LLVMValueRef *params, + unsigned param_count, unsigned attrib_mask); + +LLVMValueRef +ac_build_gather_values_extended(struct ac_llvm_context *ctx, + LLVMValueRef *values, + unsigned value_count, + unsigned value_stride, + bool load); +LLVMValueRef +ac_build_gather_values(struct ac_llvm_context *ctx, + LLVMValueRef *values, + unsigned value_count); + +LLVMValueRef +ac_emit_fdiv(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef den); + +void +ac_prepare_cube_coords(struct ac_llvm_context *ctx, + bool is_deriv, bool is_array, + LLVMValueRef *coords_arg, + LLVMValueRef *derivs_arg); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index bc3a3456220..ae21be4d8ff 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -51,6 +51,7 @@ enum desc_type { }; struct nir_to_llvm_context { + struct ac_llvm_context ac; const struct ac_nir_compiler_options *options; struct ac_shader_variant_info *shader_info; @@ -141,77 +142,6 @@ struct ac_tex_info { bool has_offset; }; -enum ac_func_attr { - AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), - AC_FUNC_ATTR_BYVAL = (1 << 1), - AC_FUNC_ATTR_INREG = (1 << 2), - AC_FUNC_ATTR_NOALIAS = (1 << 3), - AC_FUNC_ATTR_NOUNWIND = (1 << 4), - AC_FUNC_ATTR_READNONE = (1 << 5), - AC_FUNC_ATTR_READONLY = (1 << 6), - AC_FUNC_ATTR_LAST = (1 << 7) -}; - -#if HAVE_LLVM < 0x0400 -static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) -{ - switch (attr) { - case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; - case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; - case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; - case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; - case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; - case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; - case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; - default: - fprintf(stderr, "Unhandled function attribute: %x\n", attr); - return 0; - } -} - -#else - -static const char *attr_to_str(enum ac_func_attr attr) -{ - switch (attr) { - case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; - case AC_FUNC_ATTR_BYVAL: return "byval"; - case AC_FUNC_ATTR_INREG: return "inreg"; - case AC_FUNC_ATTR_NOALIAS: return "noalias"; - case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; - case AC_FUNC_ATTR_READNONE: return "readnone"; - case AC_FUNC_ATTR_READONLY: return "readonly"; - default: - fprintf(stderr, "Unhandled function attribute: %x\n", attr); - return 0; - } -} - -#endif - -static void -ac_add_function_attr(LLVMValueRef function, - int attr_idx, - enum ac_func_attr attr) -{ - -#if HAVE_LLVM < 0x0400 - LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); - if (attr_idx == -1) { - LLVMAddFunctionAttr(function, llvm_attr); - } else { - LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); - } -#else - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); - const char *attr_name = attr_to_str(attr); - unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, - strlen(attr_name)); - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); -#endif -} - static LLVMValueRef emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name, LLVMTypeRef return_type, LLVMValueRef *params, @@ -3319,130 +3249,6 @@ static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx, *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK); } -static LLVMValueRef build_cube_intrinsic(struct nir_to_llvm_context *ctx, - LLVMValueRef *in) -{ - - LLVMValueRef v, cube_vec; - - if (1) { - LLVMTypeRef f32 = LLVMTypeOf(in[0]); - LLVMValueRef out[4]; - - out[0] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out[1] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out[2] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out[3] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", - f32, in, 3, AC_FUNC_ATTR_READNONE); - - return build_gather_values(ctx, out, 4); - } else { - LLVMValueRef c[4]; - c[0] = in[0]; - c[1] = in[1]; - c[2] = in[2]; - c[3] = LLVMGetUndef(LLVMTypeOf(in[0])); - cube_vec = build_gather_values(ctx, c, 4); - v = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", LLVMTypeOf(cube_vec), - &cube_vec, 1, AC_FUNC_ATTR_READNONE); - } - return v; -} - -static void cube_to_2d_coords(struct nir_to_llvm_context *ctx, - LLVMValueRef *in, LLVMValueRef *out) -{ - LLVMValueRef coords[4]; - LLVMValueRef mad_args[3]; - LLVMValueRef v; - LLVMValueRef tmp; - int i; - - v = build_cube_intrinsic(ctx, in); - for (i = 0; i < 4; i++) - coords[i] = LLVMBuildExtractElement(ctx->builder, v, - LLVMConstInt(ctx->i32, i, false), ""); - - coords[2] = emit_llvm_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, - &coords[2], 1, AC_FUNC_ATTR_READNONE); - coords[2] = emit_fdiv(ctx, ctx->f32one, coords[2]); - - mad_args[1] = coords[2]; - mad_args[2] = LLVMConstReal(ctx->f32, 1.5); - mad_args[0] = coords[0]; - - /* emit MAD */ - tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], ""); - coords[0] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], ""); - - mad_args[0] = coords[1]; - - /* emit MAD */ - tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], ""); - coords[1] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], ""); - - /* apply xyz = yxw swizzle to cooords */ - out[0] = coords[1]; - out[1] = coords[0]; - out[2] = coords[3]; -} - -static void emit_prepare_cube_coords(struct nir_to_llvm_context *ctx, - LLVMValueRef *coords_arg, int num_coords, - bool is_deriv, - bool is_array, LLVMValueRef *derivs_arg) -{ - LLVMValueRef coords[4]; - int i; - cube_to_2d_coords(ctx, coords_arg, coords); - - if (is_deriv && derivs_arg) { - LLVMValueRef derivs[4]; - int axis; - - /* Convert cube derivatives to 2D derivatives. */ - for (axis = 0; axis < 2; axis++) { - LLVMValueRef shifted_cube_coords[4], shifted_coords[4]; - - /* Shift the cube coordinates by the derivatives to get - * the cube coordinates of the "neighboring pixel". - */ - for (i = 0; i < 3; i++) - shifted_cube_coords[i] = - LLVMBuildFAdd(ctx->builder, coords_arg[i], - derivs_arg[axis*3+i], ""); - shifted_cube_coords[3] = LLVMGetUndef(ctx->f32); - - /* Project the shifted cube coordinates onto the face. */ - cube_to_2d_coords(ctx, shifted_cube_coords, - shifted_coords); - - /* Subtract both sets of 2D coordinates to get 2D derivatives. - * This won't work if the shifted coordinates ended up - * in a different face. - */ - for (i = 0; i < 2; i++) - derivs[axis * 2 + i] = - LLVMBuildFSub(ctx->builder, shifted_coords[i], - coords[i], ""); - } - - memcpy(derivs_arg, derivs, sizeof(derivs)); - } - - if (is_array) { - /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ - /* coords_arg.w component - array_index for cube arrays */ - LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); - coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); - } - - memcpy(coords_arg, coords, sizeof(coords)); -} - static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; @@ -3584,7 +3390,9 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) coords[chan] = to_float(ctx, coords[chan]); if (instr->coord_components == 3) coords[3] = LLVMGetUndef(ctx->f32); - emit_prepare_cube_coords(ctx, coords, instr->coord_components, instr->op == nir_texop_txd, instr->is_array, derivs); + ac_prepare_cube_coords(&ctx->ac, + instr->op == nir_texop_txd, instr->is_array, + coords, derivs); if (num_deriv_comp) num_deriv_comp--; } @@ -4694,6 +4502,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, ctx.context = LLVMContextCreate(); ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); + ac_llvm_context_init(&ctx.ac, ctx.context); + ctx.ac.module = ctx.module; + ctx.has_ds_bpermute = ctx.options->chip_class >= VI; memset(shader_info, 0, sizeof(*shader_info)); @@ -4702,6 +4513,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, setup_types(&ctx); ctx.builder = LLVMCreateBuilderInContext(ctx.context); + ctx.ac.builder = ctx.builder; ctx.stage = nir->stage; for (i = 0; i < AC_UD_MAX_SETS; i++) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6f0f414cdad..c24d82ddaea 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4606,7 +4606,11 @@ static void tex_fetch_args( target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) - si_prepare_cube_coords(bld_base, emit_data, coords, derivs); + ac_prepare_cube_coords(&ctx->ac, + opcode == TGSI_OPCODE_TXD, + target == TGSI_TEXTURE_CUBE_ARRAY || + target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, + coords, derivs); if (opcode == TGSI_OPCODE_TXD) for (int i = 0; i < num_deriv_channels * 2; i++) diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 8d6a40b1644..6b3ac171f7a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -28,6 +28,7 @@ #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_tgsi.h" #include "tgsi/tgsi_parse.h" +#include "ac_llvm_util.h" #include #include @@ -48,6 +49,7 @@ struct si_llvm_flow; struct si_shader_context { struct lp_build_tgsi_soa_context soa; struct gallivm_state gallivm; + struct ac_llvm_context ac; struct si_shader *shader; struct si_screen *screen; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 3e0f7c4f769..8c8b4266e12 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1266,6 +1266,10 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context, unsafe_fpmath); + ac_llvm_context_init(&ctx->ac, ctx->gallivm.context); + ctx->ac.module = ctx->gallivm.module; + ctx->ac.builder = ctx->gallivm.builder; + struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; bld_base->info = info; -- 2.30.2