struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
+ struct hash_table *verified_interp;
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMValueRef *locals;
};
+static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr,
+ bool image);
+
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
nir_deref_instr *deref_instr,
enum ac_descriptor_type desc_type,
const nir_instr *instr,
+ LLVMValueRef index,
bool image, bool write);
static void
LLVMIntPredicate pred, LLVMValueRef src0,
LLVMValueRef src1)
{
+ LLVMTypeRef src0_type = LLVMTypeOf(src0);
+ LLVMTypeRef src1_type = LLVMTypeOf(src1);
+
+ if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+ src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
+ } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
+ src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
+ }
+
LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
return LLVMBuildSelect(ctx->builder, result,
LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
LLVMTypeRef result_type,
LLVMValueRef src0)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
}
LLVMTypeRef result_type,
LLVMValueRef src0, LLVMValueRef src1)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
}
LLVMTypeRef result_type,
LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
ac_to_float(ctx, src2),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
}
return result;
}
+struct waterfall_context {
+ LLVMBasicBlockRef phi_bb[2];
+ bool use_waterfall;
+};
+
+/* To deal with divergent descriptors we can create a loop that handles all
+ * lanes with the same descriptor on a given iteration (henceforth a
+ * waterfall loop).
+ *
+ * These helper create the begin and end of the loop leaving the caller
+ * to implement the body.
+ *
+ * params:
+ * - ctx is the usal nir context
+ * - wctx is a temporary struct containing some loop info. Can be left uninitialized.
+ * - value is the possibly divergent value for which we built the loop
+ * - divergent is whether value is actually divergent. If false we just pass
+ * things through.
+ */
+static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ LLVMValueRef value, bool divergent)
+{
+ /* If the app claims the value is divergent but it is constant we can
+ * end up with a dynamic index of NULL. */
+ if (!value)
+ divergent = false;
+
+ wctx->use_waterfall = divergent;
+ if (!divergent)
+ return value;
+
+ ac_build_bgnloop(&ctx->ac, 6000);
+
+ LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
+
+ LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value,
+ scalar_value, "uniform_active");
+
+ wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
+ ac_build_ifcc(&ctx->ac, active, 6001);
+
+ return scalar_value;
+}
+
+static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ LLVMValueRef value)
+{
+ LLVMValueRef ret = NULL;
+ LLVMValueRef phi_src[2];
+ LLVMValueRef cc_phi_src[2] = {
+ LLVMConstInt(ctx->ac.i32, 0, false),
+ LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
+ };
+
+ if (!wctx->use_waterfall)
+ return value;
+
+ wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
+
+ ac_build_endif(&ctx->ac, 6001);
+
+ if (value) {
+ phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
+ phi_src[1] = value;
+
+ ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
+ }
+
+ /*
+ * By using the optimization barrier on the exit decision, we decouple
+ * the operations from the break, and hence avoid LLVM hoisting the
+ * opteration into the break block.
+ */
+ LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
+ ac_build_optimization_barrier(&ctx->ac, &cc);
+
+ LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
+ ac_build_ifcc(&ctx->ac, active, 6002);
+ ac_build_break(&ctx->ac);
+ ac_build_endif(&ctx->ac, 6002);
+
+ ac_build_endloop(&ctx->ac, 6000);
+ return ret;
+}
+
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
LLVMValueRef src[4], result = NULL;
unsigned num_components = instr->dest.dest.ssa.num_components;
unsigned src_components;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
+ bool saved_inexact = false;
+
+ if (instr->exact)
+ saved_inexact = ac_disable_inexact_math(ctx->ac.builder);
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
switch (instr->op) {
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_frcp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+ ac_get_type_size(def_type) == 8) {
+ result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
+ ac_to_float(&ctx->ac, src[0]), "");
+ } else {
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ }
break;
case nir_op_iand:
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frsq:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f16_rtz:
+ case nir_op_f2f16:
+ case nir_op_f2fmp:
src[0] = ac_to_float(&ctx->ac, src[0]);
- if (LLVMTypeOf(src[0]) == ctx->ac.f64)
- src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
- LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
- result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+
+ /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
+ * all f32->f16 conversions have to round towards zero, because both scalar
+ * and vec2 down-conversions have to round equally.
+ */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL ||
+ instr->op == nir_op_f2f16_rtz) {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+
+ if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
+
+ /* Fast path conversion. This only works if NIR is vectorized
+ * to vec2 16.
+ */
+ if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
+ LLVMValueRef args[] = {
+ ac_llvm_extract_elem(&ctx->ac, src[0], 0),
+ ac_llvm_extract_elem(&ctx->ac, src[0], 1),
+ };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
+ break;
+ }
+
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ } else {
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ else
+ result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ }
break;
case nir_op_f2f16_rtne:
- case nir_op_f2f16:
case nir_op_f2f32:
case nir_op_f2f64:
src[0] = ac_to_float(&ctx->ac, src[0]);
break;
case nir_op_u2u8:
case nir_op_u2u16:
+ case nir_op_u2ump:
case nir_op_u2u32:
case nir_op_u2u64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
break;
case nir_op_i2i8:
case nir_op_i2i16:
+ case nir_op_i2imp:
case nir_op_i2i32:
case nir_op_i2i64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
result = ac_to_integer_or_pointer(&ctx->ac, result);
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
+
+ if (instr->exact)
+ ac_restore_inexact_math(ctx->ac.builder, saved_inexact);
}
static void visit_load_const(struct ac_nir_context *ctx,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ assert(instr->dest.is_ssa);
return ac_build_buffer_load_format(&ctx->ac,
args->resource,
args->coords[0],
ctx->ac.i32_0,
util_last_bit(mask),
- 0, true);
+ 0, true,
+ instr->dest.ssa.bit_size == 16);
}
args->opcode = ac_image_sample;
break;
case nir_texop_tg4:
args->opcode = ac_image_gather4;
- args->level_zero = true;
+ if (!args->lod && !args->bias)
+ args->level_zero = true;
break;
case nir_texop_lod:
args->opcode = ac_image_get_lod;
break;
+ case nir_texop_fragment_fetch:
+ case nir_texop_fragment_mask_fetch:
+ args->opcode = ac_image_load;
+ args->level_zero = false;
+ break;
default:
break;
}
if (instr->dest.ssa.bit_size == 8) {
unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
LLVMValueRef params[3];
if (load_dwords > 1) {
- LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+ LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
} else {
res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
if (instr->dest.ssa.num_components > 1)
- res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
+ res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
return res;
} else if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
}
if (access & ACCESS_STREAM_CACHE_POLICY)
- cache_policy |= ac_slc;
+ cache_policy |= ac_slc | ac_glc;
return cache_policy;
}
+static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr,
+ nir_src src)
+{
+ return enter_waterfall(ctx, wctx, get_src(ctx, src),
+ nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+}
+
static void visit_store_ssbo(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7000);
+ }
+
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
unsigned writemask = nir_intrinsic_write_mask(instr);
bool writeonly_memory = access & ACCESS_NON_READABLE;
unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[1]), true);
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
+
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
LLVMValueRef base_data = src_data;
base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
count = 1;
num_bytes = 2;
}
+
+ /* Due to alignment issues, split stores of 8-bit/16-bit
+ * vectors.
+ */
+ if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
+ writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+ count = 1;
+ num_bytes = elem_size_bytes;
+ }
+
data = extract_vector_range(&ctx->ac, base_data, start, count);
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
cache_policy);
}
}
+
+ exit_waterfall(ctx, &wctx, NULL);
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7000);
}
static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
}
static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7001);
+ }
+
LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
const char *op;
char name[64], type[8];
LLVMValueRef params[6], descriptor;
+ LLVMValueRef result;
int arg_count = 0;
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
+
switch (instr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add:
op = "add";
}
descriptor = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]),
+ rsrc_base,
true);
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
return_type == ctx->ac.i64) {
- return emit_ssbo_comp_swap_64(ctx, descriptor,
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]),
- get_src(ctx, instr->src[3]));
- }
- if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
- }
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
- params[arg_count++] = descriptor;
+ result = emit_ssbo_comp_swap_64(ctx, descriptor,
+ get_src(ctx, instr->src[1]),
+ get_src(ctx, instr->src[2]),
+ get_src(ctx, instr->src[3]));
+ } else {
+ if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
+ }
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+ params[arg_count++] = descriptor;
- if (LLVM_VERSION_MAJOR >= 9) {
- /* XXX: The new raw/struct atomic intrinsics are buggy with
- * LLVM 8, see r358579.
- */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i32_0; /* soffset */
- params[arg_count++] = ctx->ac.i32_0; /* slc */
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* XXX: The new raw/struct atomic intrinsics are buggy with
+ * LLVM 8, see r358579.
+ */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i32_0; /* soffset */
+ params[arg_count++] = ctx->ac.i32_0; /* slc */
+
+ ac_build_type_name_for_intr(return_type, type, sizeof(type));
+ snprintf(name, sizeof(name),
+ "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
+ } else {
+ params[arg_count++] = ctx->ac.i32_0; /* vindex */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i1false; /* slc */
- ac_build_type_name_for_intr(return_type, type, sizeof(type));
- snprintf(name, sizeof(name),
- "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
- } else {
- params[arg_count++] = ctx->ac.i32_0; /* vindex */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i1false; /* slc */
+ assert(return_type == ctx->ac.i32);
+ snprintf(name, sizeof(name),
+ "llvm.amdgcn.buffer.atomic.%s", op);
+ }
- assert(return_type == ctx->ac.i32);
- snprintf(name, sizeof(name),
- "llvm.amdgcn.buffer.atomic.%s", op);
+ result = ac_build_intrinsic(&ctx->ac, name, return_type, params,
+ arg_count, 0);
}
- return ac_build_intrinsic(&ctx->ac, name, return_type, params,
- arg_count, 0);
+ result = exit_waterfall(ctx, &wctx, result);
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7001);
+ return result;
}
static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
+
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
int num_components = instr->num_components;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
unsigned cache_policy = get_cache_policy(ctx, access, false, false);
LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]), false);
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
LLVMValueRef vindex = ctx->ac.i32_0;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
i += num_elems;
}
- return ac_build_gather_values(&ctx->ac, results, num_components);
+ LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
+ return exit_waterfall(ctx, &wctx, ret);
+}
+
+static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr)
+{
+ return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
+ nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
+ struct waterfall_context wctx;
+ LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
+
LLVMValueRef ret;
- LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
+ LLVMValueRef rsrc = rsrc_base;
LLVMValueRef offset = get_src(ctx, instr->src[1]);
int num_components = instr->num_components;
ret = ac_trim_vector(&ctx->ac, ret, num_components);
}
- return LLVMBuildBitCast(ctx->ac.builder, ret,
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret,
get_def_type(ctx, &instr->dest.ssa), "");
+
+ return exit_waterfall(ctx, &wctx, ret);
}
static void
break;
case nir_var_mem_global: {
LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
unsigned natural_stride = type_scalar_size_bytes(deref->type);
unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
+ bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
- LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
- if (stride != natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ if (stride != natural_stride || split_loads) {
+ if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
+ result_type = LLVMGetElementType(result_type);
+
+ LLVMTypeRef ptr_type = LLVMPointerType(result_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
values[i] = LLVMBuildLoad(ctx->ac.builder,
ac_build_gep_ptr(&ctx->ac, address, offset), "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
}
return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
} else {
LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
return val;
}
}
visit_store_var(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7002);
+ }
+
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
ctx->abi->store_tcs_outputs(ctx->abi, var,
vertex_index, indir_index,
const_index, src, writemask);
- return;
+ break;
}
for (unsigned chan = 0; chan < 8; chan++) {
unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
unsigned natural_stride = type_scalar_size_bytes(deref->type);
unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
+ bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
- stride == natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ stride == natural_stride && !split_stores) {
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
val = LLVMBuildBitCast(ctx->ac.builder, val,
LLVMGetElementType(LLVMTypeOf(address)), "");
- LLVMBuildStore(ctx->ac.builder, val, address);
+ LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
} else {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ LLVMTypeRef val_type = LLVMTypeOf(val);
+ if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
+ val_type = LLVMGetElementType(val_type);
+
+ LLVMTypeRef ptr_type = LLVMPointerType(val_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
for (unsigned chan = 0; chan < 4; chan++) {
if (!(writemask & (1 << chan)))
chan);
src = LLVMBuildBitCast(ctx->ac.builder, src,
LLVMGetElementType(LLVMTypeOf(ptr)), "");
- LLVMBuildStore(ctx->ac.builder, src, ptr);
+ LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
}
}
break;
abort();
break;
}
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7002);
}
static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_index,
enum ac_descriptor_type desc_type,
bool write)
{
instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
- return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, true, write);
+ return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
}
static void get_image_coords(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_desc_index,
struct ac_image_args *args,
enum glsl_sampler_dim dim,
bool is_array)
fmask_load_address[2],
sample_index,
get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- AC_DESC_FMASK, &instr->instr, true, false));
+ AC_DESC_FMASK, &instr->instr, dynamic_desc_index, true, false));
}
if (count == 1 && !gfx9_1d) {
if (instr->src[1].ssa->num_components)
static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
+ LLVMValueRef dynamic_index,
bool write, bool atomic)
{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
return rsrc;
}
+static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
+ struct waterfall_context *wctx,
+ const nir_intrinsic_instr *instr)
+{
+ nir_deref_instr *deref_instr = NULL;
+
+ if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
+ deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+
+ LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
+ return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
+}
+
static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr,
bool bindless)
LLVMValueRef res;
enum glsl_sampler_dim dim;
- enum gl_access_qualifier access;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool is_array;
if (bindless) {
dim = nir_intrinsic_image_dim(instr);
- access = nir_intrinsic_access(instr);
is_array = nir_intrinsic_image_array(instr);
} else {
const nir_deref_instr *image_deref = get_image_deref(instr);
const struct glsl_type *type = image_deref->type;
const nir_variable *var = nir_deref_instr_get_variable(image_deref);
dim = glsl_get_sampler_dim(type);
- access = var->data.access;
+ access |= var->data.access;
is_array = glsl_sampler_type_is_array(type);
}
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
struct ac_image_args args = {};
args.cache_policy = get_cache_policy(ctx, access, false, false);
unsigned num_channels = util_last_bit(mask);
LLVMValueRef rsrc, vindex;
- rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
+ rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, "");
+ assert(instr->dest.is_ssa);
bool can_speculate = access & ACCESS_CAN_REORDER;
res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
ctx->ac.i32_0, num_channels,
args.cache_policy,
- can_speculate);
+ can_speculate,
+ instr->dest.ssa.bit_size == 16);
res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
res = ac_to_integer(&ctx->ac, res);
} else {
- args.opcode = ac_image_load;
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
- get_image_coords(ctx, instr, &args, dim, is_array);
+ bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
+
+ args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ if (!level_zero)
+ args.lod = get_src(ctx, instr->src[3]);
args.dmask = 15;
args.attributes = AC_FUNC_ATTR_READONLY;
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
res = ac_build_image_opcode(&ctx->ac, &args);
}
- return res;
+ return exit_waterfall(ctx, &wctx, res);
}
static void visit_image_store(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr,
+ const nir_intrinsic_instr *instr,
bool bindless)
{
-
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7003);
+ }
enum glsl_sampler_dim dim;
- enum gl_access_qualifier access;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool is_array;
+
if (bindless) {
dim = nir_intrinsic_image_dim(instr);
- access = nir_intrinsic_access(instr);
is_array = nir_intrinsic_image_array(instr);
} else {
const nir_deref_instr *image_deref = get_image_deref(instr);
const struct glsl_type *type = image_deref->type;
const nir_variable *var = nir_deref_instr_get_variable(image_deref);
dim = glsl_get_sampler_dim(type);
- access = var->data.access;
+ access |= var->data.access;
is_array = glsl_sampler_type_is_array(type);
}
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
bool writeonly_memory = access & ACCESS_NON_READABLE;
struct ac_image_args args = {};
args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
if (dim == GLSL_SAMPLER_DIM_BUF) {
- LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
+ LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
unsigned src_channels = ac_get_llvm_num_components(src);
LLVMValueRef vindex;
ctx->ac.i32_0, "");
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
- ctx->ac.i32_0, src_channels,
- args.cache_policy);
+ ctx->ac.i32_0, args.cache_policy);
} else {
- args.opcode = ac_image_store;
+ bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
+
+ args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, &args, dim, is_array);
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ if (!level_zero)
+ args.lod = get_src(ctx, instr->src[4]);
args.dmask = 15;
+ args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
ac_build_image_opcode(&ctx->ac, &args);
}
+ exit_waterfall(ctx, &wctx, NULL);
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7003);
}
static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
+ const nir_intrinsic_instr *instr,
+ bool bindless)
{
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7004);
+ }
+
LLVMValueRef params[7];
int param_count = 0;
is_array = glsl_sampler_type_is_array(type);
}
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+
switch (instr->intrinsic) {
case nir_intrinsic_bindless_image_atomic_add:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_inc_wrap: {
atomic_name = "inc";
atomic_subop = ac_atomic_inc_wrap;
- /* ATOMIC_INC instruction does:
- * value = (value + 1) % (data + 1)
- * but we want:
- * value = (value + 1) % data
- * So replace 'data' by 'data - 1'.
- */
- ctx->ssa_defs[instr->src[3].ssa->index] =
- LLVMBuildSub(ctx->ac.builder,
- ctx->ssa_defs[instr->src[3].ssa->index],
- ctx->ac.i32_1, "");
break;
}
case nir_intrinsic_bindless_image_atomic_dec_wrap:
params[param_count++] = get_src(ctx, instr->src[4]);
params[param_count++] = get_src(ctx, instr->src[3]);
+ LLVMValueRef result;
if (dim == GLSL_SAMPLER_DIM_BUF) {
- params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
+ params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, ""); /* vindex */
params[param_count++] = ctx->ac.i32_0; /* voffset */
}
assert(length < sizeof(intrinsic_name));
- return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
- params, param_count, 0);
+ result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
+ params, param_count, 0);
} else {
struct ac_image_args args = {};
args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
args.data[0] = params[0];
if (cmpswap)
args.data[1] = params[1];
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, &args, dim, is_array);
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- return ac_build_image_opcode(&ctx->ac, &args);
+ result = ac_build_image_opcode(&ctx->ac, &args);
}
+
+ result = exit_waterfall(ctx, &wctx, result);
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7004);
+ return result;
}
static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
+ nir_intrinsic_instr *instr)
{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
- return ac_build_image_get_sample_count(&ctx->ac, rsrc);
+ LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
+
+ return exit_waterfall(ctx, &wctx, ret);
}
static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
is_array = glsl_sampler_type_is_array(type);
}
- if (dim == GLSL_SAMPLER_DIM_BUF)
- return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true);
+ struct waterfall_context wctx;
+ LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
- struct ac_image_args args = { 0 };
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ res = get_buffer_size(ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
+ } else {
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- args.dmask = 0xf;
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
- args.opcode = ac_image_get_resinfo;
- args.lod = ctx->ac.i32_0;
- args.attributes = AC_FUNC_ATTR_READNONE;
+ struct ac_image_args args = { 0 };
- res = ac_build_image_opcode(&ctx->ac, &args);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ args.dmask = 0xf;
+ args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
+ args.opcode = ac_image_get_resinfo;
+ args.lod = ctx->ac.i32_0;
+ args.attributes = AC_FUNC_ATTR_READNONE;
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ res = ac_build_image_opcode(&ctx->ac, &args);
- if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
- LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
- }
- if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
- ctx->ac.i32_1, "");
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
+ }
+
+ if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
+ ctx->ac.i32_1, "");
+ }
}
- return res;
+ return exit_waterfall(ctx, &wctx, res);
}
static void emit_membar(struct ac_llvm_context *ac,
case nir_intrinsic_group_memory_barrier:
wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
break;
- case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
cond = ctx->ac.i1false;
}
- ctx->abi->emit_kill(ctx->abi, cond);
+ ac_build_kill_if_false(&ctx->ac, cond);
+}
+
+static void emit_demote(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef cond;
+
+ if (instr->intrinsic == nir_intrinsic_demote_if) {
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
+ get_src(ctx, instr->src[0]),
+ ctx->ac.i32_0, "");
+ } else {
+ assert(instr->intrinsic == nir_intrinsic_demote);
+ cond = ctx->ac.i1false;
+ }
+
+ /* Kill immediately while maintaining WQM. */
+ ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
+
+ LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
+ mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
+ LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
+ return;
}
static LLVMValueRef
const nir_intrinsic_instr *instr,
LLVMValueRef ptr, int src_idx)
{
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7005);
+ }
+
LLVMValueRef result;
LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
+ if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) {
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ if (deref->mode == nir_var_mem_global) {
+ /* use "singlethread" sync scope to implement relaxed ordering */
+ sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread";
+
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type , "");
+ }
+ }
+
if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
case nir_intrinsic_deref_atomic_exchange:
op = LLVMAtomicRMWBinOpXchg;
break;
+#if LLVM_VERSION_MAJOR >= 10
+ case nir_intrinsic_shared_atomic_fadd:
+ case nir_intrinsic_deref_atomic_fadd:
+ op = LLVMAtomicRMWBinOpFAdd;
+ break;
+#endif
default:
return NULL;
}
- result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
+ LLVMValueRef val;
+
+ if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
+ instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
+ val = ac_to_float(&ctx->ac, src);
+ } else {
+ val = ac_to_integer(&ctx->ac, src);
+ }
+
+ result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
}
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7005);
return result;
}
return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
}
+static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
+{
+ return LLVMBuildBitCast(ctx->ac.builder,
+ ac_get_arg(&ctx->ac, ctx->args->pull_model),
+ ctx->ac.v3i32, "");
+}
+
static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
LLVMValueRef interp_param,
unsigned index, unsigned comp_start,
unsigned bitsize)
{
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+ LLVMValueRef interp_param_f;
- interp_param = LLVMBuildBitCast(ctx->ac.builder,
+ interp_param_f = LLVMBuildBitCast(ctx->ac.builder,
interp_param, ctx->ac.v2f32, "");
LLVMValueRef i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
LLVMValueRef j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
+
+ /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
+ if (ctx->verified_interp &&
+ !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
+ LLVMValueRef args[2];
+ args[0] = i;
+ args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
+ LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+ args, 2, AC_FUNC_ATTR_READNONE);
+ ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
+ _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
+ }
LLVMValueRef values[4];
assert(bitsize == 16 || bitsize == 32);
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
}
-static LLVMValueRef load_flat_input(struct ac_nir_context *ctx,
- unsigned index, unsigned comp_start,
- unsigned num_components,
- unsigned bit_size)
+static LLVMValueRef load_input(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
{
- LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+ unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1;
+
+ /* We only lower inputs for fragment shaders ATM */
+ ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]);
+ assert(offset);
+ assert(offset[0].i32 == 0);
+
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned index = nir_intrinsic_base(instr);
+ unsigned vertex_id = 2; /* P0 */
+
+ if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
+ nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
+ switch (src0[0].i32) {
+ case 0:
+ vertex_id = 2;
+ break;
+ case 1:
+ vertex_id = 0;
+ break;
+ case 2:
+ vertex_id = 1;
+ break;
+ default:
+ unreachable("Invalid vertex index");
+ }
+ }
+
+ LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
LLVMValueRef values[8];
/* Each component of a 64-bit value takes up two GL-level channels. */
+ unsigned num_components = instr->dest.ssa.num_components;
+ unsigned bit_size = instr->dest.ssa.bit_size;
unsigned channels =
bit_size == 64 ? num_components * 2 : num_components;
for (unsigned chan = 0; chan < channels; chan++) {
- if (comp_start + chan > 4)
+ if (component + chan > 4)
attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false);
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (comp_start + chan) % 4, false);
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
values[chan] = ac_build_fs_interp_mov(&ctx->ac,
- LLVMConstInt(ctx->ac.i32, 2, false),
+ LLVMConstInt(ctx->ac.i32, vertex_id, false),
llvm_chan,
attr_number,
ac_get_arg(&ctx->ac, ctx->args->prim_mask));
case nir_intrinsic_load_helper_invocation:
result = ac_build_load_helper_invocation(&ctx->ac);
break;
+ case nir_intrinsic_is_helper_invocation:
+ result = ac_build_is_helper_invocation(&ctx->ac);
+ break;
case nir_intrinsic_load_color0:
result = ctx->abi->color0;
break;
result = visit_image_size(ctx, instr, false);
break;
case nir_intrinsic_shader_clock:
- result = ac_build_shader_clock(&ctx->ac);
+ result = ac_build_shader_clock(&ctx->ac,
+ nir_intrinsic_memory_scope(instr));
break;
case nir_intrinsic_discard:
case nir_intrinsic_discard_if:
emit_discard(ctx, instr);
break;
+ case nir_intrinsic_demote:
+ case nir_intrinsic_demote_if:
+ emit_demote(ctx, instr);
+ break;
case nir_intrinsic_memory_barrier:
case nir_intrinsic_group_memory_barrier:
- case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier_shared:
emit_membar(&ctx->ac, instr);
break;
- case nir_intrinsic_barrier:
+ case nir_intrinsic_scoped_barrier: {
+ assert(!(nir_intrinsic_memory_semantics(instr) &
+ (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+ nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
+
+ unsigned wait_flags = 0;
+ if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
+ wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ if (modes & nir_var_mem_shared)
+ wait_flags |= AC_WAIT_LGKM;
+
+ if (wait_flags)
+ ac_build_waitcnt(&ctx->ac, wait_flags);
+
+ if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ }
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ break;
+ case nir_intrinsic_control_barrier:
ac_emit_barrier(&ctx->ac, ctx->stage);
break;
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_shared_atomic_comp_swap: {
+ case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_fadd: {
LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
instr->src[1].ssa->bit_size);
result = visit_var_atomic(ctx, instr, ptr, 1);
case nir_intrinsic_deref_atomic_or:
case nir_intrinsic_deref_atomic_xor:
case nir_intrinsic_deref_atomic_exchange:
- case nir_intrinsic_deref_atomic_comp_swap: {
+ case nir_intrinsic_deref_atomic_comp_swap:
+ case nir_intrinsic_deref_atomic_fadd: {
LLVMValueRef ptr = get_src(ctx, instr->src[0]);
result = visit_var_atomic(ctx, instr, ptr, 1);
break;
case nir_intrinsic_load_barycentric_sample:
result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
break;
+ case nir_intrinsic_load_barycentric_model:
+ result = barycentric_model(ctx);
+ break;
case nir_intrinsic_load_barycentric_at_offset: {
LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
instr->dest.ssa.bit_size);
break;
}
- case nir_intrinsic_load_input: {
- /* We only lower inputs for fragment shaders ATM */
- ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[0]);
- assert(offset);
- assert(offset[0].i32 == 0);
-
- unsigned index = nir_intrinsic_base(instr);
- unsigned component = nir_intrinsic_component(instr);
- result = load_flat_input(ctx, index, component,
- instr->dest.ssa.num_components,
- instr->dest.ssa.bit_size);
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_vertex:
+ result = load_input(ctx, instr);
break;
- }
case nir_intrinsic_emit_vertex:
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
break;
+ case nir_intrinsic_emit_vertex_with_counter: {
+ unsigned stream = nir_intrinsic_stream_id(instr);
+ LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+ ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
+ next_vertex,
+ ctx->abi->outputs);
+ break;
+ }
case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
break;
case nir_intrinsic_load_tess_coord:
break;
}
case nir_intrinsic_shuffle:
- result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
+ if (ctx->ac.chip_class == GFX8 ||
+ ctx->ac.chip_class == GFX9 ||
+ (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
+ result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]));
+ } else {
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[1]);
+ LLVMTypeRef type = LLVMTypeOf(src);
+ struct waterfall_context wctx;
+ LLVMValueRef index_val;
+
+ index_val = enter_waterfall(ctx, &wctx, index, true);
+
+ src = LLVMBuildZExt(ctx->ac.builder, src,
+ ctx->ac.i32, "");
+
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
+ ctx->ac.i32,
+ (LLVMValueRef []) { src, index_val }, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+
+ result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
+
+ result = exit_waterfall(ctx, &wctx, result);
+ }
break;
case nir_intrinsic_reduce:
result = ac_build_reduce(&ctx->ac,
break;
}
case nir_intrinsic_load_constant: {
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned range = nir_intrinsic_range(instr);
+
LLVMValueRef offset = get_src(ctx, instr->src[0]);
- LLVMValueRef base = LLVMConstInt(ctx->ac.i32,
- nir_intrinsic_base(instr),
- false);
- offset = LLVMBuildAdd(ctx->ac.builder, offset, base, "");
+ offset = LLVMBuildAdd(ctx->ac.builder, offset,
+ LLVMConstInt(ctx->ac.i32, base, false), "");
+
+ /* Clamp the offset to avoid out-of-bound access because global
+ * instructions can't handle them.
+ */
+ LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+ offset, size, "");
+ offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
+
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
offset);
LLVMTypeRef comp_type =
return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
}
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- enum ac_descriptor_type desc_type,
- const nir_instr *instr,
- bool image, bool write)
+struct sampler_desc_address {
+ unsigned descriptor_set;
+ unsigned base_index; /* binding in vulkan */
+ unsigned constant_index;
+ LLVMValueRef dynamic_index;
+ bool image;
+ bool bindless;
+};
+
+static struct sampler_desc_address
+get_sampler_desc_internal(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr,
+ bool image)
{
LLVMValueRef index = NULL;
unsigned constant_index = 0;
} else
base_index = deref_instr->var->data.binding;
}
+ return (struct sampler_desc_address) {
+ .descriptor_set = descriptor_set,
+ .base_index = base_index,
+ .constant_index = constant_index,
+ .dynamic_index = index,
+ .image = image,
+ .bindless = bindless,
+ };
+}
+/* Extract any possibly divergent index into a separate value that can be fed
+ * into get_sampler_desc with the same arguments. */
+static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ const nir_instr *instr,
+ bool image)
+{
+ struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
+ return addr.dynamic_index;
+}
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ enum ac_descriptor_type desc_type,
+ const nir_instr *instr,
+ LLVMValueRef index,
+ bool image, bool write)
+{
+ struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
return ctx->abi->load_sampler_desc(ctx->abi,
- descriptor_set,
- base_index,
- constant_index, index,
- desc_type, image, write, bindless);
+ addr.descriptor_set,
+ addr.base_index,
+ addr.constant_index, index,
+ desc_type, addr.image, write, addr.bindless);
}
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
static void tex_fetch_ptrs(struct ac_nir_context *ctx,
nir_tex_instr *instr,
+ struct waterfall_context *wctx,
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
LLVMValueRef *fmask_ptr)
{
}
}
+ LLVMValueRef texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr,
+ &instr->instr, false);
if (!sampler_deref_instr)
sampler_deref_instr = texture_deref_instr;
+ LLVMValueRef sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr,
+ &instr->instr, false);
+ if (instr->texture_non_uniform)
+ texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
+
+ if (instr->sampler_non_uniform)
+ sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
+
enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
if (plane >= 0) {
main_descriptor = AC_DESC_PLANE_0 + plane;
}
- *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr, false, false);
+ if (instr->op == nir_texop_fragment_mask_fetch) {
+ /* The fragment mask is fetched from the compressed
+ * multisampled surface.
+ */
+ main_descriptor = AC_DESC_FMASK;
+ }
+
+ *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
+ texture_dynamic_index, false, false);
if (samp_ptr) {
- *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr, false, false);
+ *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
+ sampler_dynamic_index, false, false);
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
}
if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
instr->op == nir_texop_samples_identical))
- *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr, false, false);
+ *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK,
+ &instr->instr, texture_dynamic_index, false, false);
}
static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
LLVMValueRef ddx = NULL, ddy = NULL;
unsigned offset_src = 0;
+ struct waterfall_context wctx[2] = {{{0}}};
- tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
+ tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
offset_src = i;
break;
case nir_tex_src_bias:
- if (instr->op == nir_texop_txb)
- args.bias = get_src(ctx, instr->src[i].src);
+ args.bias = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_lod: {
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
case nir_tex_src_ddy:
ddy = get_src(ctx, instr->src[i].src);
break;
+ case nir_tex_src_min_lod:
+ args.min_lod = get_src(ctx, instr->src[i].src);
+ break;
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
case nir_tex_src_plane:
if (instr->op == nir_texop_texture_samples) {
LLVMValueRef res, samples, is_msaa;
+ LLVMValueRef default_sample;
+
res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
samples = LLVMBuildExtractElement(ctx->ac.builder, res,
LLVMConstInt(ctx->ac.i32, 3, false), "");
LLVMConstInt(ctx->ac.i32, 0xf, false), "");
samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
samples, "");
+
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef dword1, is_null_descriptor;
+
+ /* Extract the second dword of the descriptor, if it's
+ * all zero, then it's a null descriptor.
+ */
+ dword1 = LLVMBuildExtractElement(ctx->ac.builder, res,
+ LLVMConstInt(ctx->ac.i32, 1, false), "");
+ is_null_descriptor =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ default_sample =
+ LLVMBuildSelect(ctx->ac.builder, is_null_descriptor,
+ ctx->ac.i32_0, ctx->ac.i32_1, "");
+ } else {
+ default_sample = ctx->ac.i32_1;
+ }
+
samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
- ctx->ac.i32_1, "");
+ default_sample, "");
result = samples;
goto write_result;
}
instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
instr->is_array &&
- instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
+ instr->op != nir_texop_txf &&
+ instr->op != nir_texop_txf_ms &&
+ instr->op != nir_texop_fragment_fetch &&
+ instr->op != nir_texop_fragment_mask_fetch) {
args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
}
}
/* Pack sample index */
- if (instr->op == nir_texop_txf_ms && sample_index)
+ if (sample_index && (instr->op == nir_texop_txf_ms ||
+ instr->op == nir_texop_fragment_fetch))
args.coords[instr->coord_components] = sample_index;
if (instr->op == nir_texop_samples_identical) {
if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
- instr->op != nir_texop_txs) {
+ instr->op != nir_texop_txs &&
+ instr->op != nir_texop_fragment_fetch &&
+ instr->op != nir_texop_fragment_mask_fetch) {
unsigned sample_chan = instr->is_array ? 3 : 2;
args.coords[sample_chan] = adjust_sample_index_using_fmask(
&ctx->ac, args.coords[0], args.coords[1],
args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
}
+
+ /* Adjust the number of coordinates because we only need (x,y) for 2D
+ * multisampled images and (x,y,layer) for 2D multisampled layered
+ * images or for multisampled input attachments.
+ */
+ if (instr->op == nir_texop_fragment_mask_fetch) {
+ if (args.dim == ac_image_2dmsaa) {
+ args.dim = ac_image_2d;
+ } else {
+ assert(args.dim == ac_image_2darraymsaa);
+ args.dim = ac_image_2darray;
+ }
+ }
+
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
result = build_tex_intrinsic(ctx, instr, &args);
if (instr->op == nir_texop_query_levels)
if (result) {
assert(instr->dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
+
+ for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
+ result = exit_waterfall(ctx, wctx + i, result);
+ }
+
ctx->ssa_defs[instr->dest.ssa.index] = result;
}
}
-
static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
{
LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
}
+static bool is_def_used_in_an_export(const nir_ssa_def* def) {
+ nir_foreach_use(use_src, def) {
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+ if (instr->intrinsic == nir_intrinsic_store_deref)
+ return true;
+ } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+ if (instr->op == nir_op_vec4 &&
+ is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static void visit_ssa_undef(struct ac_nir_context *ctx,
const nir_ssa_undef_instr *instr)
{
unsigned num_components = instr->def.num_components;
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
- LLVMValueRef undef;
- if (num_components == 1)
- undef = LLVMGetUndef(type);
- else {
- undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+ } else {
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ if (num_components > 1) {
+ zero = ac_build_gather_values_extended(
+ &ctx->ac, &zero, 4, 0, false, false);
+ }
+ ctx->ssa_defs[instr->def.index] = zero;
}
- ctx->ssa_defs[instr->def.index] = undef;
}
static void visit_jump(struct ac_llvm_context *ctx,
{
int i, j;
ctx->num_locals = 0;
- nir_foreach_variable(variable, &func->impl->locals) {
+ nir_foreach_function_temp_variable(variable, func->impl) {
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
variable->data.driver_location = ctx->num_locals * 4;
variable->data.location_frac = 0;
ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
- nir_foreach_variable(variable, &nir->outputs)
+ nir_foreach_shader_out_variable(variable, nir)
ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
ctx.stage);
ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
func = (struct nir_function *)exec_list_get_head(&nir->functions);
nir_index_ssa_defs(func->impl);
if (gl_shader_stage_is_compute(nir->info.stage))
setup_shared(&ctx, nir);
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) {
+ ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, "");
+ /* true = don't kill. */
+ LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill);
+ }
+
visit_cf_list(&ctx, &func->impl->body);
phi_post_pass(&ctx);
+ if (ctx.ac.postponed_kill)
+ ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder,
+ ctx.ac.postponed_kill, ""));
+
if (!gl_shader_stage_is_compute(nir->info.stage))
ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
ctx.abi->outputs);
ralloc_free(ctx.defs);
ralloc_free(ctx.phis);
ralloc_free(ctx.vars);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ralloc_free(ctx.verified_interp);
}
bool
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (intrin->intrinsic == nir_intrinsic_barrier) {
+ if (intrin->intrinsic == nir_intrinsic_control_barrier) {
/* If we find a barrier in nested control flow put this in the
* too hard basket. In GLSL this is not possible but it is in