LLVMValueRef *locals;
};
+static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr,
+ bool image);
+
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
nir_deref_instr *deref_instr,
enum ac_descriptor_type desc_type,
const nir_instr *instr,
+ LLVMValueRef index,
bool image, bool write);
static void
return result;
}
+struct waterfall_context {
+ LLVMBasicBlockRef phi_bb[2];
+ bool use_waterfall;
+};
+
+/* To deal with divergent descriptors we can create a loop that handles all
+ * lanes with the same descriptor on a given iteration (henceforth a
+ * waterfall loop).
+ *
+ * These helper create the begin and end of the loop leaving the caller
+ * to implement the body.
+ *
+ * params:
+ * - ctx is the usal nir context
+ * - wctx is a temporary struct containing some loop info. Can be left uninitialized.
+ * - value is the possibly divergent value for which we built the loop
+ * - divergent is whether value is actually divergent. If false we just pass
+ * things through.
+ */
+static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ LLVMValueRef value, bool divergent)
+{
+ /* If the app claims the value is divergent but it is constant we can
+ * end up with a dynamic index of NULL. */
+ if (!value)
+ divergent = false;
+
+ wctx->use_waterfall = divergent;
+ if (!divergent)
+ return value;
+
+ ac_build_bgnloop(&ctx->ac, 6000);
+
+ LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
+
+ LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value,
+ scalar_value, "uniform_active");
+
+ wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
+ ac_build_ifcc(&ctx->ac, active, 6001);
+
+ return scalar_value;
+}
+
+static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ LLVMValueRef value)
+{
+ LLVMValueRef ret = NULL;
+ LLVMValueRef phi_src[2];
+ LLVMValueRef cc_phi_src[2] = {
+ LLVMConstInt(ctx->ac.i32, 0, false),
+ LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
+ };
+
+ if (!wctx->use_waterfall)
+ return value;
+
+ wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
+
+ ac_build_endif(&ctx->ac, 6001);
+
+ if (value) {
+ phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
+ phi_src[1] = value;
+
+ ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
+ }
+
+ /*
+ * By using the optimization barrier on the exit decision, we decouple
+ * the operations from the break, and hence avoid LLVM hoisting the
+ * opteration into the break block.
+ */
+ LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
+ ac_build_optimization_barrier(&ctx->ac, &cc);
+
+ LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
+ ac_build_ifcc(&ctx->ac, active, 6002);
+ ac_build_break(&ctx->ac);
+ ac_build_endif(&ctx->ac, 6002);
+
+ ac_build_endloop(&ctx->ac, 6000);
+ return ret;
+}
+
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
LLVMValueRef src[4], result = NULL;
return cache_policy;
}
+static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr,
+ nir_src src)
+{
+ return enter_waterfall(ctx, wctx, get_src(ctx, src),
+ nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+}
+
static void visit_store_ssbo(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
bool writeonly_memory = access & ACCESS_NON_READABLE;
unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[1]), true);
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
+
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
LLVMValueRef base_data = src_data;
base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
}
}
+ exit_waterfall(ctx, &wctx, NULL);
+
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7000);
}
}
static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
LLVMValueRef result;
int arg_count = 0;
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
+
switch (instr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add:
op = "add";
}
descriptor = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]),
+ rsrc_base,
true);
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
return_type == ctx->ac.i64) {
result = emit_ssbo_comp_swap_64(ctx, descriptor,
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]),
- get_src(ctx, instr->src[3]));
- if (ctx->ac.postponed_kill)
- ac_build_endif(&ctx->ac, 7001);
- return result;
- }
- if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
- }
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
- params[arg_count++] = descriptor;
+ get_src(ctx, instr->src[1]),
+ get_src(ctx, instr->src[2]),
+ get_src(ctx, instr->src[3]));
+ } else {
+ if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
+ }
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+ params[arg_count++] = descriptor;
- if (LLVM_VERSION_MAJOR >= 9) {
- /* XXX: The new raw/struct atomic intrinsics are buggy with
- * LLVM 8, see r358579.
- */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i32_0; /* soffset */
- params[arg_count++] = ctx->ac.i32_0; /* slc */
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* XXX: The new raw/struct atomic intrinsics are buggy with
+ * LLVM 8, see r358579.
+ */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i32_0; /* soffset */
+ params[arg_count++] = ctx->ac.i32_0; /* slc */
+
+ ac_build_type_name_for_intr(return_type, type, sizeof(type));
+ snprintf(name, sizeof(name),
+ "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
+ } else {
+ params[arg_count++] = ctx->ac.i32_0; /* vindex */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i1false; /* slc */
- ac_build_type_name_for_intr(return_type, type, sizeof(type));
- snprintf(name, sizeof(name),
- "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
- } else {
- params[arg_count++] = ctx->ac.i32_0; /* vindex */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i1false; /* slc */
+ assert(return_type == ctx->ac.i32);
+ snprintf(name, sizeof(name),
+ "llvm.amdgcn.buffer.atomic.%s", op);
+ }
- assert(return_type == ctx->ac.i32);
- snprintf(name, sizeof(name),
- "llvm.amdgcn.buffer.atomic.%s", op);
+ result = ac_build_intrinsic(&ctx->ac, name, return_type, params,
+ arg_count, 0);
}
- result = ac_build_intrinsic(&ctx->ac, name, return_type, params,
- arg_count, 0);
- if (ctx->ac.postponed_kill)
+ result = exit_waterfall(ctx, &wctx, result);
+ if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7001);
return result;
}
static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
+
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
int num_components = instr->num_components;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
unsigned cache_policy = get_cache_policy(ctx, access, false, false);
LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]), false);
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
LLVMValueRef vindex = ctx->ac.i32_0;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
i += num_elems;
}
- return ac_build_gather_values(&ctx->ac, results, num_components);
+ LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
+ return exit_waterfall(ctx, &wctx, ret);
+}
+
+static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr)
+{
+ return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
+ nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
+
LLVMValueRef ret;
- LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
+ LLVMValueRef rsrc = rsrc_base;
LLVMValueRef offset = get_src(ctx, instr->src[1]);
int num_components = instr->num_components;
ret = ac_trim_vector(&ctx->ac, ret, num_components);
}
- return LLVMBuildBitCast(ctx->ac.builder, ret,
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret,
get_def_type(ctx, &instr->dest.ssa), "");
+
+ return exit_waterfall(ctx, &wctx, ret);
}
static void
static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_index,
enum ac_descriptor_type desc_type,
bool write)
{
instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
- return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, true, write);
+ return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
}
static void get_image_coords(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_desc_index,
struct ac_image_args *args,
enum glsl_sampler_dim dim,
bool is_array)
fmask_load_address[2],
sample_index,
get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- AC_DESC_FMASK, &instr->instr, true, false));
+ AC_DESC_FMASK, &instr->instr, dynamic_desc_index, true, false));
}
if (count == 1 && !gfx9_1d) {
if (instr->src[1].ssa->num_components)
static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_index,
bool write, bool atomic)
{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
return rsrc;
}
+static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr)
+{
+ nir_deref_instr *deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
+ return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+}
+
static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
bool bindless)
is_array = glsl_sampler_type_is_array(type);
}
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
struct ac_image_args args = {};
args.cache_policy = get_cache_policy(ctx, access, false, false);
unsigned num_channels = util_last_bit(mask);
LLVMValueRef rsrc, vindex;
- rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
+ rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, "");
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
- get_image_coords(ctx, instr, &args, dim, is_array);
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
if (!level_zero)
args.lod = get_src(ctx, instr->src[3]);
res = ac_build_image_opcode(&ctx->ac, &args);
}
- return res;
+ return exit_waterfall(ctx, &wctx, res);
}
static void visit_image_store(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr,
+ const nir_intrinsic_instr *instr,
bool bindless)
{
if (ctx->ac.postponed_kill) {
enum glsl_sampler_dim dim;
enum gl_access_qualifier access;
bool is_array;
+
if (bindless) {
dim = nir_intrinsic_image_dim(instr);
access = nir_intrinsic_access(instr);
is_array = glsl_sampler_type_is_array(type);
}
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
bool writeonly_memory = access & ACCESS_NON_READABLE;
struct ac_image_args args = {};
args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
if (dim == GLSL_SAMPLER_DIM_BUF) {
- LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
+ LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
unsigned src_channels = ac_get_llvm_num_components(src);
LLVMValueRef vindex;
args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, &args, dim, is_array);
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
if (!level_zero)
args.lod = get_src(ctx, instr->src[4]);
ac_build_image_opcode(&ctx->ac, &args);
}
+ exit_waterfall(ctx, &wctx, NULL);
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7003);
}
static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
+ const nir_intrinsic_instr *instr,
+ bool bindless)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
is_array = glsl_sampler_type_is_array(type);
}
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
switch (instr->intrinsic) {
case nir_intrinsic_bindless_image_atomic_add:
case nir_intrinsic_image_deref_atomic_add:
LLVMValueRef result;
if (dim == GLSL_SAMPLER_DIM_BUF) {
- params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
+ params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, ""); /* vindex */
params[param_count++] = ctx->ac.i32_0; /* voffset */
args.data[0] = params[0];
if (cmpswap)
args.data[1] = params[1];
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, &args, dim, is_array);
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
result = ac_build_image_opcode(&ctx->ac, &args);
}
+ result = exit_waterfall(ctx, &wctx, result);
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7004);
return result;
}
static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
- return ac_build_image_get_sample_count(&ctx->ac, rsrc);
+ LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
+
+ return exit_waterfall(ctx, &wctx, ret);
}
static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
is_array = glsl_sampler_type_is_array(type);
}
- if (dim == GLSL_SAMPLER_DIM_BUF)
- return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true);
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
- struct ac_image_args args = { 0 };
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ res = get_buffer_size(ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
+ } else {
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- args.dmask = 0xf;
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
- args.opcode = ac_image_get_resinfo;
- args.lod = ctx->ac.i32_0;
- args.attributes = AC_FUNC_ATTR_READNONE;
+ struct ac_image_args args = { 0 };
- res = ac_build_image_opcode(&ctx->ac, &args);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ args.dmask = 0xf;
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ args.opcode = ac_image_get_resinfo;
+ args.lod = ctx->ac.i32_0;
+ args.attributes = AC_FUNC_ATTR_READNONE;
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ res = ac_build_image_opcode(&ctx->ac, &args);
- if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
- LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
- }
- if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
- ctx->ac.i32_1, "");
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+
+ if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
+ }
+ if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
+ ctx->ac.i32_1, "");
+ }
}
- return res;
+ return exit_waterfall(ctx, &wctx, res);
}
static void emit_membar(struct ac_llvm_context *ac,
return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
}
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- enum ac_descriptor_type desc_type,
- const nir_instr *instr,
- bool image, bool write)
+struct sampler_desc_address {
+ unsigned descriptor_set;
+ unsigned base_index; /* binding in vulkan */
+ unsigned constant_index;
+ LLVMValueRef dynamic_index;
+ bool image;
+ bool bindless;
+};
+
+static struct sampler_desc_address
+get_sampler_desc_internal(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr,
+ bool image)
{
LLVMValueRef index = NULL;
unsigned constant_index = 0;
} else
base_index = deref_instr->var->data.binding;
}
+ return (struct sampler_desc_address) {
+ .descriptor_set = descriptor_set,
+ .base_index = base_index,
+ .constant_index = constant_index,
+ .dynamic_index = index,
+ .image = image,
+ .bindless = bindless,
+ };
+}
+/* Extract any possibly divergent index into a separate value that can be fed
+ * into get_sampler_desc with the same arguments. */
+static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr,
+ bool image)
+{
+ struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
+ return addr.dynamic_index;
+}
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ enum ac_descriptor_type desc_type,
+ const nir_instr *instr,
+ LLVMValueRef index,
+ bool image, bool write)
+{
+ struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
return ctx->abi->load_sampler_desc(ctx->abi,
- descriptor_set,
- base_index,
- constant_index, index,
- desc_type, image, write, bindless);
+ addr.descriptor_set,
+ addr.base_index,
+ addr.constant_index, index,
+ desc_type, addr.image, write, addr.bindless);
}
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
static void tex_fetch_ptrs(struct ac_nir_context *ctx,
nir_tex_instr *instr,
+ struct waterfall_context *wctx,
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
LLVMValueRef *fmask_ptr)
{
if (!sampler_deref_instr)
sampler_deref_instr = texture_deref_instr;
+ LLVMValueRef texture_dynamic_index = NULL, sampler_dynamic_index = NULL;
+ if (texture_deref_instr) {
+ texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);
+ texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, instr->texture_non_uniform);
+ }
+
+ if (sampler_deref_instr && sampler_deref_instr != texture_deref_instr) {
+ sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);
+ sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, instr->sampler_non_uniform);
+ } else
+ sampler_dynamic_index = texture_dynamic_index;
+
enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
if (plane >= 0) {
main_descriptor = AC_DESC_FMASK;
}
- *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr, false, false);
+ *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
+ texture_dynamic_index, false, false);
if (samp_ptr) {
- *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr, false, false);
+ *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
+ sampler_dynamic_index, false, false);
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
}
if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
instr->op == nir_texop_samples_identical))
- *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr, false, false);
+ *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK,
+ &instr->instr, texture_dynamic_index, false, false);
}
static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
LLVMValueRef ddx = NULL, ddy = NULL;
unsigned offset_src = 0;
+ struct waterfall_context wctx[2] = {{{0}}};
- tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
+ tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
if (result) {
assert(instr->dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
+
+ for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
+ result = exit_waterfall(ctx, wctx + i, result);
+ }
+
ctx->ssa_defs[instr->dest.ssa.index] = result;
}
}
-
static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
{
LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);