}
}
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+ unsigned count_incoming, LLVMValueRef *values,
+ LLVMBasicBlockRef *blocks)
+{
+ LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMAddIncoming(phi, values, blocks, count_incoming);
+ return phi;
+}
+
/* Prevent optimizations (at least of memory accesses) across the current
* point in the program by emitting empty inline assembly that is marked as
* having side effects.
* selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
* the selcoords major axis.
*/
-static void build_cube_select(LLVMBuilderRef builder,
+static void build_cube_select(struct ac_llvm_context *ctx,
const struct cube_selection_coords *selcoords,
const LLVMValueRef *coords,
LLVMValueRef *out_st,
LLVMValueRef *out_ma)
{
+ LLVMBuilderRef builder = ctx->builder;
LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
LLVMValueRef is_ma_positive;
LLVMValueRef sgn_ma;
is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
/* Select sc */
- tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+ tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
- LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+ LLVMBuildSelect(builder, is_ma_z, sgn_ma,
LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
/* Select tc */
tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+ sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
LLVMConstReal(f32, -1.0), "");
out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
/* Select ma */
tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- sgn = LLVMBuildSelect(builder, is_ma_positive,
- LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
- *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+ tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+ ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+ *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
}
void
LLVMValueRef invma;
if (is_array && !is_lod) {
- coords_arg[3] = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32,
- &coords_arg[3], 1, 0);
+ LLVMValueRef tmp = coords_arg[3];
+ tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
+
+ /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+ *
+ * "For Array forms, the array layer used will be
+ *
+ * max(0, min(d−1, floor(layer+0.5)))
+ *
+ * where d is the depth of the texture array and layer
+ * comes from the component indicated in the tables below.
+ * Workaroudn for an issue where the layer is taken from a
+ * helper invocation which happens to fall on a different
+ * layer due to extrapolation."
+ *
+ * VI and earlier attempt to implement this in hardware by
+ * clamping the value of coords[2] = (8 * layer) + face.
+ * Unfortunately, this means that the we end up with the wrong
+ * face when clamping occurs.
+ *
+ * Clamp the layer earlier to work around the issue.
+ */
+ if (ctx->chip_class <= VI) {
+ LLVMValueRef ge0;
+ ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+ tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+ }
+
+ coords_arg[3] = tmp;
}
build_cube_intrinsic(ctx, coords_arg, &selcoords);
* seems awfully quiet about how textureGrad for cube
* maps should be handled.
*/
- build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+ build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
deriv_st, &deriv_ma);
deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
*/
LLVMValueRef
ac_build_ddxy(struct ac_llvm_context *ctx,
- bool has_ds_bpermute,
uint32_t mask,
int idx,
LLVMValueRef val)
LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
- if (has_ds_bpermute) {
+ if (ctx->chip_class >= VI) {
LLVMValueRef thread_id, tl_tid, trbl_tid;
thread_id = ac_get_thread_id(ctx);
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
} else {
- uint32_t masks[2];
+ uint32_t masks[2] = {};
switch (mask) {
case AC_TID_MASK_TOP_LEFT:
masks[0] = 0x80a0;
masks[1] = 0x80f5;
break;
+ default:
+ assert(0);
}
args[0] = val;