-static inline enum dpp_ctrl
-dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
- assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
- return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
-}
-
-static inline enum dpp_ctrl
-dpp_row_sl(unsigned amount)
-{
- assert(amount > 0 && amount < 16);
- return _dpp_row_sl | amount;
-}
-
-static inline enum dpp_ctrl
-dpp_row_sr(unsigned amount)
-{
- assert(amount > 0 && amount < 16);
- return _dpp_row_sr | amount;
-}
-
-static LLVMValueRef
-_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
- enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
- bool bound_ctrl)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32",
- LLVMTypeOf(old),
- (LLVMValueRef[]) {
- old, src,
- LLVMConstInt(ctx->i32, dpp_ctrl, 0),
- LLVMConstInt(ctx->i32, row_mask, 0),
- LLVMConstInt(ctx->i32, bank_mask, 0),
- LLVMConstInt(ctx->i1, bound_ctrl, 0) },
- 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-static LLVMValueRef
-ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
- enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
- bool bound_ctrl)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- old = ac_to_integer(ctx, old);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask,
- bank_mask, bound_ctrl);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- LLVMValueRef old_vector =
- LLVMBuildBitCast(ctx->builder, old, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- old = LLVMBuildExtractElement(ctx->builder, old_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src,
- dpp_ctrl,
- row_mask,
- bank_mask,
- bound_ctrl);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
- bool exchange_rows, bool bound_ctrl)
-{
- LLVMValueRef args[6] = {
- src,
- src,
- LLVMConstInt(ctx->i32, sel, false),
- LLVMConstInt(ctx->i32, sel >> 32, false),
- ctx->i1true, /* fi */
- bound_ctrl ? ctx->i1true : ctx->i1false,
- };
- return ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
- : "llvm.amdgcn.permlane16",
- ctx->i32, args, 6,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-static LLVMValueRef
-ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
- bool exchange_rows, bool bound_ctrl)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
- bound_ctrl);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp =
- _ac_build_permlane16(ctx, src, sel,
- exchange_rows,
- bound_ctrl);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static inline unsigned
-ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
-{
- assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
- return and_mask | (or_mask << 5) | (xor_mask << 10);
-}
-
-static LLVMValueRef
-_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle",
- LLVMTypeOf(src), (LLVMValueRef []) {
- src, LLVMConstInt(ctx->i32, mask, 0) },
- 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_ds_swizzle(ctx, src, mask);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src,
- mask);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
-{
- char name[32], type[8];
- ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(src),
- (LLVMValueRef []) { src }, 1,
- AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef
-ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
- LLVMValueRef inactive)
-{
- char name[33], type[8];
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- inactive = ac_to_integer(ctx, inactive);
- ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
- LLVMValueRef ret =
- ac_build_intrinsic(ctx, name,
- LLVMTypeOf(src), (LLVMValueRef []) {
- src, inactive }, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size)
-{
- if (type_size == 4) {
- switch (op) {
- case nir_op_iadd: return ctx->i32_0;
- case nir_op_fadd: return ctx->f32_0;
- case nir_op_imul: return ctx->i32_1;
- case nir_op_fmul: return ctx->f32_1;
- case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0);
- case nir_op_umax: return ctx->i32_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0);
- case nir_op_ior: return ctx->i32_0;
- case nir_op_ixor: return ctx->i32_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- } else { /* type_size == 64bit */
- switch (op) {
- case nir_op_iadd: return ctx->i64_0;
- case nir_op_fadd: return ctx->f64_0;
- case nir_op_imul: return ctx->i64_1;
- case nir_op_fmul: return ctx->f64_1;
- case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0);
- case nir_op_umax: return ctx->i64_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0);
- case nir_op_ior: return ctx->i64_0;
- case nir_op_ixor: return ctx->i64_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- }
-}
-
-static LLVMValueRef
-ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op)
-{
- bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
- switch (op) {
- case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
- case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
- case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, "");
- case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
- case nir_op_imin: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_umin: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_fmin: return ac_build_intrinsic(ctx,
- _64bit ? "llvm.minnum.f64" : "llvm.minnum.f32",
- _64bit ? ctx->f64 : ctx->f32,
- (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
- case nir_op_imax: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_umax: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_fmax: return ac_build_intrinsic(ctx,
- _64bit ? "llvm.maxnum.f64" : "llvm.maxnum.f32",
- _64bit ? ctx->f64 : ctx->f32,
- (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
- case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
- case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
- case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
- default:
- unreachable("bad reduction intrinsic");
- }
+static inline enum dpp_ctrl dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2,
+ unsigned lane3)
+{
+ assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
+ return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
+}
+
+static inline enum dpp_ctrl dpp_row_sl(unsigned amount)
+{
+ assert(amount > 0 && amount < 16);
+ return _dpp_row_sl | amount;
+}
+
+static inline enum dpp_ctrl dpp_row_sr(unsigned amount)
+{
+ assert(amount > 0 && amount < 16);
+ return _dpp_row_sr | amount;
+}
+
+static LLVMValueRef _ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
+ enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
+ bool bound_ctrl)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef res;
+
+ old = LLVMBuildZExt(ctx->builder, old, ctx->i32, "");
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ res = ac_build_intrinsic(
+ ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32,
+ (LLVMValueRef[]){old, src, LLVMConstInt(ctx->i32, dpp_ctrl, 0),
+ LLVMConstInt(ctx->i32, row_mask, 0), LLVMConstInt(ctx->i32, bank_mask, 0),
+ LLVMConstInt(ctx->i1, bound_ctrl, 0)},
+ 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, res, type, "");
+}
+
+static LLVMValueRef ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
+ enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
+ bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ old = ac_to_integer(ctx, old);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ LLVMValueRef old_vector = LLVMBuildBitCast(ctx->builder, old, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ old = LLVMBuildExtractElement(ctx->builder, old_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp =
+ _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef _ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src,
+ uint64_t sel, bool exchange_rows, bool bound_ctrl)
+{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef result;
+
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ LLVMValueRef args[6] = {
+ src,
+ src,
+ LLVMConstInt(ctx->i32, sel, false),
+ LLVMConstInt(ctx->i32, sel >> 32, false),
+ ctx->i1true, /* fi */
+ bound_ctrl ? ctx->i1true : ctx->i1false,
+ };
+
+ result =
+ ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" : "llvm.amdgcn.permlane16",
+ ctx->i32, args, 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, result, type, "");
+}
+
+static LLVMValueRef ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
+ bool exchange_rows, bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
+{
+ assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
+ return and_mask | (or_mask << 5) | (xor_mask << 10);
+}
+
+static LLVMValueRef _ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned mask)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ LLVMValueRef ret;
+
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32,
+ (LLVMValueRef[]){src, LLVMConstInt(ctx->i32, mask, 0)}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, ret, src_type, "");
+}
+
+LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits > 32) {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, mask);
+ ret =
+ LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ } else {
+ ret = _ac_build_ds_swizzle(ctx, src, mask);
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ unsigned bitsize = ac_get_elem_bits(ctx, src_type);
+ char name[32], type[8];
+ LLVMValueRef ret;
+
+ src = ac_to_integer(ctx, src);
+
+ if (bitsize < 32)
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
+ ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
+ ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ if (bitsize < 32)
+ ret = LLVMBuildTrunc(ctx->builder, ret, ac_to_integer_type(ctx, src_type), "");
+
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef inactive)
+{
+ char name[33], type[8];
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ unsigned bitsize = ac_get_elem_bits(ctx, src_type);
+ src = ac_to_integer(ctx, src);
+ inactive = ac_to_integer(ctx, inactive);
+
+ if (bitsize < 32) {
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+ inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, "");
+ }
+
+ ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
+ LLVMValueRef ret =
+ ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src, inactive}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+ if (bitsize < 32)
+ ret = LLVMBuildTrunc(ctx->builder, ret, src_type, "");
+
+ return ret;
+}
+
+static LLVMValueRef get_reduction_identity(struct ac_llvm_context *ctx, nir_op op,
+ unsigned type_size)
+{
+ if (type_size == 1) {
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i8_0;
+ case nir_op_imul:
+ return ctx->i8_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i8, INT8_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i8, UINT8_MAX, 0);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i8, INT8_MIN, 0);
+ case nir_op_umax:
+ return ctx->i8_0;
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i8, -1, 0);
+ case nir_op_ior:
+ return ctx->i8_0;
+ case nir_op_ixor:
+ return ctx->i8_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else if (type_size == 2) {
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i16_0;
+ case nir_op_fadd:
+ return ctx->f16_0;
+ case nir_op_imul:
+ return ctx->i16_1;
+ case nir_op_fmul:
+ return ctx->f16_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i16, INT16_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i16, UINT16_MAX, 0);
+ case nir_op_fmin:
+ return LLVMConstReal(ctx->f16, INFINITY);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i16, INT16_MIN, 0);
+ case nir_op_umax:
+ return ctx->i16_0;
+ case nir_op_fmax:
+ return LLVMConstReal(ctx->f16, -INFINITY);
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i16, -1, 0);
+ case nir_op_ior:
+ return ctx->i16_0;
+ case nir_op_ixor:
+ return ctx->i16_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else if (type_size == 4) {
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i32_0;
+ case nir_op_fadd:
+ return ctx->f32_0;
+ case nir_op_imul:
+ return ctx->i32_1;
+ case nir_op_fmul:
+ return ctx->f32_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i32, INT32_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
+ case nir_op_fmin:
+ return LLVMConstReal(ctx->f32, INFINITY);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i32, INT32_MIN, 0);
+ case nir_op_umax:
+ return ctx->i32_0;
+ case nir_op_fmax:
+ return LLVMConstReal(ctx->f32, -INFINITY);
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i32, -1, 0);
+ case nir_op_ior:
+ return ctx->i32_0;
+ case nir_op_ixor:
+ return ctx->i32_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else { /* type_size == 64bit */
+ switch (op) {
+ case nir_op_iadd:
+ return ctx->i64_0;
+ case nir_op_fadd:
+ return ctx->f64_0;
+ case nir_op_imul:
+ return ctx->i64_1;
+ case nir_op_fmul:
+ return ctx->f64_1;
+ case nir_op_imin:
+ return LLVMConstInt(ctx->i64, INT64_MAX, 0);
+ case nir_op_umin:
+ return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+ case nir_op_fmin:
+ return LLVMConstReal(ctx->f64, INFINITY);
+ case nir_op_imax:
+ return LLVMConstInt(ctx->i64, INT64_MIN, 0);
+ case nir_op_umax:
+ return ctx->i64_0;
+ case nir_op_fmax:
+ return LLVMConstReal(ctx->f64, -INFINITY);
+ case nir_op_iand:
+ return LLVMConstInt(ctx->i64, -1, 0);
+ case nir_op_ior:
+ return ctx->i64_0;
+ case nir_op_ixor:
+ return ctx->i64_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ }
+}
+
+static LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs,
+ nir_op op)
+{
+ bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
+ bool _32bit = ac_get_type_size(LLVMTypeOf(lhs)) == 4;
+ switch (op) {
+ case nir_op_iadd:
+ return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
+ case nir_op_fadd:
+ return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
+ case nir_op_imul:
+ return LLVMBuildMul(ctx->builder, lhs, rhs, "");
+ case nir_op_fmul:
+ return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
+ case nir_op_imin:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_umin:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_fmin:
+ return ac_build_intrinsic(
+ ctx, _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16",
+ _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
+ AC_FUNC_ATTR_READNONE);
+ case nir_op_imax:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_umax:
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_fmax:
+ return ac_build_intrinsic(
+ ctx, _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16",
+ _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
+ AC_FUNC_ATTR_READNONE);
+ case nir_op_iand:
+ return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
+ case nir_op_ior:
+ return LLVMBuildOr(ctx->builder, lhs, rhs, "");
+ case nir_op_ixor:
+ return LLVMBuildXor(ctx->builder, lhs, rhs, "");
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+}
+
+/**
+ * \param src The value to shift.
+ * \param identity The value to use the first lane.
+ * \param maxprefix specifies that the result only needs to be correct for a
+ * prefix of this many threads
+ * \return src, shifted 1 lane up, and identity shifted into lane 0.
+ */
+static LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef identity, unsigned maxprefix)
+{
+ if (ctx->chip_class >= GFX10) {
+ /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
+ LLVMValueRef active, tmp1, tmp2;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+
+ tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
+
+ tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
+
+ if (maxprefix > 32) {
+ active =
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, false), "");
+
+ tmp2 = LLVMBuildSelect(ctx->builder, active,
+ ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, false)),
+ tmp2, "");
+
+ active = LLVMBuildOr(
+ ctx->builder, active,
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, false), ""),
+ LLVMConstInt(ctx->i32, 0x10, false), ""),
+ "");
+ return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ } else if (maxprefix > 16) {
+ active =
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 16, false), "");
+
+ return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ }
+ } else if (ctx->chip_class >= GFX8) {
+ return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
+ }
+
+ /* wavefront shift_right by 1 on SI/CI */
+ LLVMValueRef active, tmp1, tmp2;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
+ LLVMConstInt(ctx->i32, 0x4, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
+ LLVMConstInt(ctx->i32, 0x8, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
+ LLVMConstInt(ctx->i32, 0x10, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
+ return LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");