LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- unsigned cache_policy,
- bool swizzle_enable_hint)
+ unsigned cache_policy)
{
/* Split 3 channel stores, because only LLVM 9+ support 3-channel
* intrinsics. */
v01 = ac_build_gather_values(ctx, v, 2);
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
- soffset, inst_offset, cache_policy,
- swizzle_enable_hint);
+ soffset, inst_offset, cache_policy);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
soffset, inst_offset + 8,
- cache_policy,
- swizzle_enable_hint);
+ cache_policy);
return;
}
* (voffset is swizzled, but soffset isn't swizzled).
* llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
*/
- if (!swizzle_enable_hint) {
+ if (!(cache_policy & ac_swizzled)) {
LLVMValueRef offset = soffset;
if (inst_offset)
_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
bool exchange_rows, bool bound_ctrl)
{
+ LLVMTypeRef type = LLVMTypeOf(src);
+ LLVMValueRef result;
+
+ src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
+
LLVMValueRef args[6] = {
src,
src,
ctx->i1true, /* fi */
bound_ctrl ? ctx->i1true : ctx->i1false,
};
- return ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
- : "llvm.amdgcn.permlane16",
- ctx->i32, args, 6,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ result = ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
+ : "llvm.amdgcn.permlane16",
+ ctx->i32, args, 6,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ return LLVMBuildTrunc(ctx->builder, result, type, "");
}
static LLVMValueRef
src = ac_to_integer(ctx, src);
unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
- bound_ctrl);
- } else {
+ if (bits > 32) {
assert(bits % 32 == 0);
LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
LLVMValueRef src_vector =
LLVMConstInt(ctx->i32, i,
0), "");
}
+ } else {
+ ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
+ bound_ctrl);
}
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
}
}
+/**
+ * \param src The value to shift.
+ * \param identity The value to use the first lane.
+ * \param maxprefix specifies that the result only needs to be correct for a
+ * prefix of this many threads
+ * \return src, shifted 1 lane up, and identity shifted into lane 0.
+ */
+static LLVMValueRef
+ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef identity, unsigned maxprefix)
+{
+ if (ctx->chip_class >= GFX10) {
+ /* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
+ LLVMValueRef active, tmp1, tmp2;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+
+ tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
+
+ tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
+
+ if (maxprefix > 32) {
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
+ LLVMConstInt(ctx->i32, 32, false), "");
+
+ tmp2 = LLVMBuildSelect(ctx->builder, active,
+ ac_build_readlane(ctx, src,
+ LLVMConstInt(ctx->i32, 31, false)),
+ tmp2, "");
+
+ active = LLVMBuildOr(ctx->builder, active,
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid,
+ LLVMConstInt(ctx->i32, 0x1f, false), ""),
+ LLVMConstInt(ctx->i32, 0x10, false), ""), "");
+ return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ } else if (maxprefix > 16) {
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid,
+ LLVMConstInt(ctx->i32, 16, false), "");
+
+ return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ }
+ } else if (ctx->chip_class >= GFX8) {
+ return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
+ }
+
+ /* wavefront shift_right by 1 on SI/CI */
+ LLVMValueRef active, tmp1, tmp2;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
+ LLVMConstInt(ctx->i32, 0x4, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
+ LLVMConstInt(ctx->i32, 0x8, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
+ LLVMConstInt(ctx->i32, 0x10, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
+ tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+ active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
+ return LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");
+}
+
/**
* \param maxprefix specifies that the result only needs to be correct for a
* prefix of this many threads
{
LLVMValueRef result, tmp;
- if (inclusive) {
- result = src;
- } else if (ctx->chip_class >= GFX10) {
- result = identity;
- } else if (ctx->chip_class >= GFX8) {
- src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
- result = src;
- } else {
- /* wavefront shift_right by 1 on SI/CI */
- LLVMValueRef active, tmp1, tmp2;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
- tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
- LLVMConstInt(ctx->i32, 0x4, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
- LLVMConstInt(ctx->i32, 0x8, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
- LLVMConstInt(ctx->i32, 0x10, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
- tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
- active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
- src = LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");
- result = src;
- }
+ if (!inclusive)
+ src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix);
+
+ result = src;
if (ctx->chip_class <= GFX7) {
assert(maxprefix == 64);
return result;
if (ctx->chip_class >= GFX10) {
- /* dpp_row_bcast{15,31} are not supported on gfx10. */
- LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMValueRef cc;
- /* TODO-GFX10: Can we get better code-gen by putting this into
- * a branch so that LLVM generates EXEC mask manipulations? */
- if (inclusive)
- tmp = result;
- else
- tmp = ac_build_alu_op(ctx, result, src, op);
- tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
- tmp = ac_build_alu_op(ctx, result, tmp, op);
- cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, false), "");
- cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
- result = LLVMBuildSelect(builder, cc, tmp, result, "");
+ LLVMValueRef active;
+
+ tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false);
+
+ active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ LLVMBuildAnd(ctx->builder, tid,
+ LLVMConstInt(ctx->i32, 16, false), ""),
+ ctx->i32_0, "");
+
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+
+ result = ac_build_alu_op(ctx, result, tmp, op);
+
if (maxprefix <= 32)
return result;
- if (inclusive)
- tmp = result;
- else
- tmp = ac_build_alu_op(ctx, result, src, op);
- tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, false));
- tmp = ac_build_alu_op(ctx, result, tmp, op);
- cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
- LLVMConstInt(ctx->i32, 32, false), "");
- result = LLVMBuildSelect(builder, cc, tmp, result, "");
+ tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+
+ active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid,
+ LLVMConstInt(ctx->i32, 32, false), "");
+
+ tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+
+ result = ac_build_alu_op(ctx, result, tmp, op);
return result;
}
} else if (bitsize == 32) {
intr = "llvm.canonicalize.f32";
type = ctx->f32;
- } else if (bitsize == 64) {
+ } else {
intr = "llvm.canonicalize.f64";
type = ctx->f64;
}
args->enabled_channels = mask;
}
+static LLVMTypeRef
+arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
+{
+ if (type == AC_ARG_FLOAT) {
+ return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
+ } else if (type == AC_ARG_INT) {
+ return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
+ } else {
+ LLVMTypeRef ptr_type;
+ switch (type) {
+ case AC_ARG_CONST_PTR:
+ ptr_type = ctx->i8;
+ break;
+ case AC_ARG_CONST_FLOAT_PTR:
+ ptr_type = ctx->f32;
+ break;
+ case AC_ARG_CONST_PTR_PTR:
+ ptr_type = ac_array_in_const32_addr_space(ctx->i8);
+ break;
+ case AC_ARG_CONST_DESC_PTR:
+ ptr_type = ctx->v4i32;
+ break;
+ case AC_ARG_CONST_IMAGE_PTR:
+ ptr_type = ctx->v8i32;
+ break;
+ default:
+ unreachable("unknown arg type");
+ }
+ if (size == 1) {
+ return ac_array_in_const32_addr_space(ptr_type);
+ } else {
+ assert(size == 2);
+ return ac_array_in_const_addr_space(ptr_type);
+ }
+ }
+}
+
+LLVMValueRef
+ac_build_main(const struct ac_shader_args *args,
+ struct ac_llvm_context *ctx,
+ enum ac_llvm_calling_convention convention,
+ const char *name, LLVMTypeRef ret_type,
+ LLVMModuleRef module)
+{
+ LLVMTypeRef arg_types[AC_MAX_ARGS];
+
+ for (unsigned i = 0; i < args->arg_count; i++) {
+ arg_types[i] = arg_llvm_type(args->args[i].type,
+ args->args[i].size, ctx);
+ }
+
+ LLVMTypeRef main_function_type =
+ LLVMFunctionType(ret_type, arg_types, args->arg_count, 0);
+
+ LLVMValueRef main_function =
+ LLVMAddFunction(module, name, main_function_type);
+ LLVMBasicBlockRef main_function_body =
+ LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body");
+ LLVMPositionBuilderAtEnd(ctx->builder, main_function_body);
+
+ LLVMSetFunctionCallConv(main_function, convention);
+ for (unsigned i = 0; i < args->arg_count; ++i) {
+ LLVMValueRef P = LLVMGetParam(main_function, i);
+
+ if (args->args[i].file != AC_ARG_SGPR)
+ continue;
+
+ ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG);
+
+ if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
+ ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
+ ac_add_attr_dereferenceable(P, UINT64_MAX);
+ }
+ }
+
+ ctx->main_function = main_function;
+ return main_function;
+}
+