struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
+ struct hash_table *verified_interp;
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMIntPredicate pred, LLVMValueRef src0,
LLVMValueRef src1)
{
+ LLVMTypeRef src0_type = LLVMTypeOf(src0);
+ LLVMTypeRef src1_type = LLVMTypeOf(src1);
+
+ if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
+ src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
+ } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
+ LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
+ src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
+ }
+
LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
return LLVMBuildSelect(ctx->builder, result,
LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
LLVMTypeRef result_type,
LLVMValueRef src0)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
}
+static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx,
+ const char *intrin,
+ LLVMTypeRef result_type,
+ LLVMValueRef src0)
+{
+ if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
+ return emit_intrin_1f_param(ctx, intrin, result_type, src0);
+
+ LLVMTypeRef elem_type = LLVMGetElementType(result_type);
+ LLVMValueRef ret = LLVMGetUndef(result_type);
+
+ /* Scalarize the intrinsic, because vectors are not supported. */
+ for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
+ char name[64], type[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
+ };
+
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
+ assert(length < sizeof(name));
+ ret = LLVMBuildInsertElement(ctx->builder, ret,
+ ac_build_intrinsic(ctx, name, elem_type, params,
+ 1, AC_FUNC_ATTR_READNONE),
+ LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ return ret;
+}
+
static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
const char *intrin,
LLVMTypeRef result_type,
LLVMValueRef src0, LLVMValueRef src1)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
}
LLVMTypeRef result_type,
LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
ac_to_float(ctx, src2),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
}
LLVMTypeRef src1_type = LLVMTypeOf(src1);
LLVMTypeRef src2_type = LLVMTypeOf(src2);
- assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMFixedVectorTypeKind);
+ assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
unsigned num_components = instr->dest.dest.ssa.num_components;
unsigned src_components;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
- bool saved_inexact = false;
-
- if (instr->exact)
- saved_inexact = ac_disable_inexact_math(ctx->ac.builder);
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
switch (instr->op) {
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_frcp:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+ ac_get_type_size(def_type) == 8) {
+ result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
+ ac_to_float(&ctx->ac, src[0]), "");
+ } else {
+ result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ }
+ if (ctx->abi->clamp_div_by_zero)
+ result = ac_build_fmin(&ctx->ac, result,
+ LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
break;
case nir_op_iand:
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
case nir_op_feq32:
result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
break;
- case nir_op_fne32:
+ case nir_op_fneu32:
result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
break;
case nir_op_flt32:
case nir_op_frsq:
result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
ac_to_float_type(&ctx->ac, def_type), src[0]);
+ if (ctx->abi->clamp_div_by_zero)
+ result = ac_build_fmin(&ctx->ac, result,
+ LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(&ctx->ac, src[0]);
case nir_op_ffma:
/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
break;
case nir_op_ldexp:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f16_rtz:
+ case nir_op_f2f16:
+ case nir_op_f2fmp:
src[0] = ac_to_float(&ctx->ac, src[0]);
- if (LLVMTypeOf(src[0]) == ctx->ac.f64)
- src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
- LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
- result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+
+ /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
+ * all f32->f16 conversions have to round towards zero, because both scalar
+ * and vec2 down-conversions have to round equally.
+ */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL ||
+ instr->op == nir_op_f2f16_rtz) {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+
+ if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
+
+ /* Fast path conversion. This only works if NIR is vectorized
+ * to vec2 16.
+ */
+ if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
+ LLVMValueRef args[] = {
+ ac_llvm_extract_elem(&ctx->ac, src[0], 0),
+ ac_llvm_extract_elem(&ctx->ac, src[0], 1),
+ };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
+ break;
+ }
+
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ } else {
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ else
+ result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ }
break;
case nir_op_f2f16_rtne:
- case nir_op_f2f16:
case nir_op_f2f32:
case nir_op_f2f64:
src[0] = ac_to_float(&ctx->ac, src[0]);
break;
case nir_op_u2u8:
case nir_op_u2u16:
+ case nir_op_u2ump:
case nir_op_u2u32:
case nir_op_u2u64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
break;
case nir_op_i2i8:
case nir_op_i2i16:
+ case nir_op_i2imp:
case nir_op_i2i32:
case nir_op_i2i64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
break;
}
- case nir_op_fmin3:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- ac_to_float_type(&ctx->ac, def_type), result, src[2]);
- break;
- case nir_op_umin3:
- result = ac_build_umin(&ctx->ac, src[0], src[1]);
- result = ac_build_umin(&ctx->ac, result, src[2]);
- break;
- case nir_op_imin3:
- result = ac_build_imin(&ctx->ac, src[0], src[1]);
- result = ac_build_imin(&ctx->ac, result, src[2]);
- break;
- case nir_op_fmax3:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- ac_to_float_type(&ctx->ac, def_type), result, src[2]);
- break;
- case nir_op_umax3:
- result = ac_build_umax(&ctx->ac, src[0], src[1]);
- result = ac_build_umax(&ctx->ac, result, src[2]);
- break;
- case nir_op_imax3:
- result = ac_build_imax(&ctx->ac, src[0], src[1]);
- result = ac_build_imax(&ctx->ac, result, src[2]);
- break;
- case nir_op_fmed3: {
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- src[2] = ac_to_float(&ctx->ac, src[2]);
- result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2],
- instr->dest.dest.ssa.bit_size);
- break;
- }
- case nir_op_imed3: {
- LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]);
- LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]);
- tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]);
- result = ac_build_imax(&ctx->ac, tmp1, tmp2);
- break;
- }
- case nir_op_umed3: {
- LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]);
- LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]);
- tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]);
- result = ac_build_umax(&ctx->ac, tmp1, tmp2);
- break;
- }
-
default:
fprintf(stderr, "Unknown NIR alu instr: ");
nir_print_instr(&instr->instr, stderr);
result = ac_to_integer_or_pointer(&ctx->ac, result);
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
-
- if (instr->exact)
- ac_restore_inexact_math(ctx->ac.builder, saved_inexact);
}
static void visit_load_const(struct ac_nir_context *ctx,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ assert(instr->dest.is_ssa);
return ac_build_buffer_load_format(&ctx->ac,
args->resource,
args->coords[0],
ctx->ac.i32_0,
util_last_bit(mask),
- 0, true);
+ 0, true,
+ instr->dest.ssa.bit_size == 16);
}
args->opcode = ac_image_sample;
break;
case nir_texop_tg4:
args->opcode = ac_image_gather4;
- args->level_zero = true;
+ if (!args->lod && !args->bias)
+ args->level_zero = true;
break;
case nir_texop_lod:
args->opcode = ac_image_get_lod;
if (instr->dest.ssa.bit_size == 8) {
unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
LLVMValueRef params[3];
if (load_dwords > 1) {
- LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+ LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
} else {
res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
if (instr->dest.ssa.num_components > 1)
- res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
+ res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
return res;
} else if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
}
if (access & ACCESS_STREAM_CACHE_POLICY)
- cache_policy |= ac_slc;
+ cache_policy |= ac_slc | ac_glc;
return cache_policy;
}
LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
LLVMTypeRef src_component_type;
- if (LLVMGetTypeKind(dest_type) == LLVMFixedVectorTypeKind)
+ if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
src_component_type = LLVMGetElementType(dest_type);
else
src_component_type = dest_type;
switch (mode) {
case nir_var_shader_in:
+ /* TODO: remove this after RADV switches to lowered IO */
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
ctx->stage == MESA_SHADER_TESS_EVAL) {
return load_tess_varyings(ctx, instr, true);
}
break;
case nir_var_shader_out:
+ /* TODO: remove this after RADV switches to lowered IO */
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
return load_tess_varyings(ctx, instr, false);
}
bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
if (stride != natural_stride || split_loads) {
- if (LLVMGetTypeKind(result_type) == LLVMFixedVectorTypeKind)
+ if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
result_type = LLVMGetElementType(result_type);
LLVMTypeRef ptr_type = LLVMPointerType(result_type,
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
values[i] = LLVMBuildLoad(ctx->ac.builder,
ac_build_gep_ptr(&ctx->ac, address, offset), "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
}
return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
} else {
LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
return val;
}
}
switch (deref->mode) {
case nir_var_shader_out:
-
+ /* TODO: remove this after RADV switches to lowered IO */
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
LLVMValueRef vertex_index = NULL;
LLVMValueRef indir_index = NULL;
ctx->abi->store_tcs_outputs(ctx->abi, var,
vertex_index, indir_index,
- const_index, src, writemask);
+ const_index, src, writemask,
+ var->data.location_frac,
+ var->data.driver_location);
break;
}
val = LLVMBuildBitCast(ctx->ac.builder, val,
LLVMGetElementType(LLVMTypeOf(address)), "");
- LLVMBuildStore(ctx->ac.builder, val, address);
+ LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
} else {
LLVMTypeRef val_type = LLVMTypeOf(val);
- if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMFixedVectorTypeKind)
+ if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
val_type = LLVMGetElementType(val_type);
LLVMTypeRef ptr_type = LLVMPointerType(val_type,
chan);
src = LLVMBuildBitCast(ctx->ac.builder, src,
LLVMGetElementType(LLVMTypeOf(ptr)), "");
- LLVMBuildStore(ctx->ac.builder, src, ptr);
+ LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr);
+
+ if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
+ LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
}
}
break;
ac_build_endif(&ctx->ac, 7002);
}
+static void
+visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
+{
+ if (ctx->ac.postponed_kill) {
+ LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
+ ctx->ac.postponed_kill, "");
+ ac_build_ifcc(&ctx->ac, cond, 7002);
+ }
+
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ nir_src offset = *nir_get_io_offset_src(instr);
+ LLVMValueRef indir_index = NULL;
+
+ if (nir_src_is_const(offset))
+ assert(nir_src_as_uint(offset) == 0);
+ else
+ indir_index = get_src(ctx, offset);
+
+ switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
+ case 32:
+ break;
+ case 64:
+ writemask = widen_mask(writemask, 2);
+ src = LLVMBuildBitCast(ctx->ac.builder, src,
+ LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
+ "");
+ break;
+ default:
+ unreachable("unhandled store_output bit size");
+ return;
+ }
+
+ writemask <<= component;
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
+ LLVMValueRef vertex_index =
+ vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
+
+ ctx->abi->store_tcs_outputs(ctx->abi, NULL,
+ vertex_index, indir_index,
+ 0, src, writemask,
+ component, base * 4);
+ return;
+ }
+
+ /* No indirect indexing is allowed after this point. */
+ assert(!indir_index);
+
+ for (unsigned chan = 0; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+ LLVMBuildStore(ctx->ac.builder, value,
+ ctx->abi->outputs[base * 4 + chan]);
+ }
+
+ if (ctx->ac.postponed_kill)
+ ac_build_endif(&ctx->ac, 7002);
+}
+
static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
{
switch (dim) {
LLVMValueRef res;
enum glsl_sampler_dim dim;
- enum gl_access_qualifier access;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool is_array;
if (bindless) {
dim = nir_intrinsic_image_dim(instr);
- access = nir_intrinsic_access(instr);
is_array = nir_intrinsic_image_array(instr);
} else {
const nir_deref_instr *image_deref = get_image_deref(instr);
const struct glsl_type *type = image_deref->type;
const nir_variable *var = nir_deref_instr_get_variable(image_deref);
dim = glsl_get_sampler_dim(type);
- access = var->data.access;
+ access |= var->data.access;
is_array = glsl_sampler_type_is_array(type);
}
vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, "");
+ assert(instr->dest.is_ssa);
bool can_speculate = access & ACCESS_CAN_REORDER;
res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
ctx->ac.i32_0, num_channels,
args.cache_policy,
- can_speculate);
+ can_speculate,
+ instr->dest.ssa.bit_size == 16);
res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
args.dmask = 15;
args.attributes = AC_FUNC_ATTR_READONLY;
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
res = ac_build_image_opcode(&ctx->ac, &args);
}
return exit_waterfall(ctx, &wctx, res);
}
enum glsl_sampler_dim dim;
- enum gl_access_qualifier access;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool is_array;
if (bindless) {
dim = nir_intrinsic_image_dim(instr);
- access = nir_intrinsic_access(instr);
is_array = nir_intrinsic_image_array(instr);
} else {
const nir_deref_instr *image_deref = get_image_deref(instr);
const struct glsl_type *type = image_deref->type;
const nir_variable *var = nir_deref_instr_get_variable(image_deref);
dim = glsl_get_sampler_dim(type);
- access = var->data.access;
+ access |= var->data.access;
is_array = glsl_sampler_type_is_array(type);
}
ctx->ac.i32_0, "");
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
- ctx->ac.i32_0, src_channels,
- args.cache_policy);
+ ctx->ac.i32_0, args.cache_policy);
} else {
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
if (!level_zero)
args.lod = get_src(ctx, instr->src[4]);
args.dmask = 15;
+ args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
ac_build_image_opcode(&ctx->ac, &args);
}
case nir_intrinsic_image_deref_atomic_inc_wrap: {
atomic_name = "inc";
atomic_subop = ac_atomic_inc_wrap;
- /* ATOMIC_INC instruction does:
- * value = (value + 1) % (data + 1)
- * but we want:
- * value = (value + 1) % data
- * So replace 'data' by 'data - 1'.
- */
- ctx->ssa_defs[instr->src[3].ssa->index] =
- LLVMBuildSub(ctx->ac.builder,
- ctx->ssa_defs[instr->src[3].ssa->index],
- ctx->ac.i32_1, "");
break;
}
case nir_intrinsic_bindless_image_atomic_dec_wrap:
args.dmask = 0xf;
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
args.opcode = ac_image_get_resinfo;
+ assert(nir_src_as_uint(instr->src[1]) == 0);
args.lod = ctx->ac.i32_0;
args.attributes = AC_FUNC_ATTR_READNONE;
case nir_intrinsic_deref_atomic_exchange:
op = LLVMAtomicRMWBinOpXchg;
break;
+#if LLVM_VERSION_MAJOR >= 10
+ case nir_intrinsic_shared_atomic_fadd:
+ case nir_intrinsic_deref_atomic_fadd:
+ op = LLVMAtomicRMWBinOpFAdd;
+ break;
+#endif
default:
return NULL;
}
- result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
+ LLVMValueRef val;
+
+ if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
+ instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
+ val = ac_to_float(&ctx->ac, src);
+ } else {
+ val = ac_to_integer(&ctx->ac, src);
+ }
+
+ result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
}
if (ctx->ac.postponed_kill)
unsigned bitsize)
{
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+ LLVMValueRef interp_param_f;
- interp_param = LLVMBuildBitCast(ctx->ac.builder,
+ interp_param_f = LLVMBuildBitCast(ctx->ac.builder,
interp_param, ctx->ac.v2f32, "");
LLVMValueRef i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
LLVMValueRef j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
+
+ /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
+ if (ctx->verified_interp &&
+ !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
+ LLVMValueRef args[2];
+ args[0] = i;
+ args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
+ LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+ args, 2, AC_FUNC_ATTR_READNONE);
+ ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
+ _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
+ }
LLVMValueRef values[4];
assert(bitsize == 16 || bitsize == 32);
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
}
-static LLVMValueRef load_input(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
+static LLVMValueRef visit_load(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr, bool is_output)
{
- unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1;
+ LLVMValueRef values[8];
+ LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMTypeRef component_type;
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned count = instr->dest.ssa.num_components *
+ (instr->dest.ssa.bit_size == 64 ? 2 : 1);
+ nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
+ LLVMValueRef vertex_index =
+ vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
+ nir_src offset = *nir_get_io_offset_src(instr);
+ LLVMValueRef indir_index = NULL;
- /* We only lower inputs for fragment shaders ATM */
- ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]);
- assert(offset);
- assert(offset[0].i32 == 0);
+ if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
+ component_type = LLVMGetElementType(dest_type);
+ else
+ component_type = dest_type;
- unsigned component = nir_intrinsic_component(instr);
- unsigned index = nir_intrinsic_base(instr);
+ if (nir_src_is_const(offset))
+ assert(nir_src_as_uint(offset) == 0);
+ else
+ indir_index = get_src(ctx, offset);
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL ||
+ (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
+ LLVMValueRef result =
+ ctx->abi->load_tess_varyings(ctx->abi, component_type,
+ vertex_index, indir_index,
+ 0, 0, base * 4,
+ component,
+ instr->num_components,
+ false, false, !is_output);
+ if (instr->dest.ssa.bit_size == 16) {
+ result = ac_to_integer(&ctx->ac, result);
+ result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
+ }
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+ }
+
+ /* No indirect indexing is allowed after this point. */
+ assert(!indir_index);
+
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ assert(nir_src_is_const(*vertex_index_src));
+
+ return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component,
+ instr->num_components,
+ nir_src_as_uint(*vertex_index_src),
+ 0, type);
+ }
+
+ if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
+ nir_intrinsic_io_semantics(instr).fb_fetch_output)
+ return ctx->abi->emit_fbfetch(ctx->abi);
+
+ /* Other non-fragment cases have inputs and outputs in temporaries. */
+ if (ctx->stage != MESA_SHADER_FRAGMENT) {
+ for (unsigned chan = component; chan < count + component; chan++) {
+ if (is_output) {
+ values[chan] = LLVMBuildLoad(ctx->ac.builder,
+ ctx->abi->outputs[base * 4 + chan], "");
+ } else {
+ values[chan] = ctx->abi->inputs[base * 4 + chan];
+ if (!values[chan])
+ values[chan] = LLVMGetUndef(ctx->ac.i32);
+ }
+ }
+ LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+ }
+
+ /* Fragment shader inputs. */
unsigned vertex_id = 2; /* P0 */
if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
}
}
- LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
- LLVMValueRef values[8];
-
- /* Each component of a 64-bit value takes up two GL-level channels. */
- unsigned num_components = instr->dest.ssa.num_components;
- unsigned bit_size = instr->dest.ssa.bit_size;
- unsigned channels =
- bit_size == 64 ? num_components * 2 : num_components;
+ LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
- for (unsigned chan = 0; chan < channels; chan++) {
+ for (unsigned chan = 0; chan < count; chan++) {
if (component + chan > 4)
- attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false);
+ attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false);
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
values[chan] = ac_build_fs_interp_mov(&ctx->ac,
LLVMConstInt(ctx->ac.i32, vertex_id, false),
ac_get_arg(&ctx->ac, ctx->args->prim_mask));
values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
- bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
+ instr->dest.ssa.bit_size == 16 ? ctx->ac.i16
+ : ctx->ac.i32, "");
}
- LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels);
- if (bit_size == 64) {
- LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 :
- LLVMVectorType(ctx->ac.i64, num_components);
- result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
- }
- return result;
+ LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
}
static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_store_deref:
visit_store_var(ctx, instr);
break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_vertex:
+ case nir_intrinsic_load_per_vertex_input:
+ result = visit_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output:
+ result = visit_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ visit_store_output(ctx, instr);
+ break;
case nir_intrinsic_load_shared:
result = visit_load_shared(ctx, instr);
break;
result = visit_image_size(ctx, instr, false);
break;
case nir_intrinsic_shader_clock:
- result = ac_build_shader_clock(&ctx->ac);
+ result = ac_build_shader_clock(&ctx->ac,
+ nir_intrinsic_memory_scope(instr));
break;
case nir_intrinsic_discard:
case nir_intrinsic_discard_if:
case nir_intrinsic_memory_barrier_shared:
emit_membar(&ctx->ac, instr);
break;
+ case nir_intrinsic_scoped_barrier: {
+ assert(!(nir_intrinsic_memory_semantics(instr) &
+ (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+ nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
+
+ unsigned wait_flags = 0;
+ if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
+ wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ if (modes & nir_var_mem_shared)
+ wait_flags |= AC_WAIT_LGKM;
+
+ if (wait_flags)
+ ac_build_waitcnt(&ctx->ac, wait_flags);
+
+ if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ }
case nir_intrinsic_memory_barrier_tcs_patch:
break;
case nir_intrinsic_control_barrier:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_shared_atomic_comp_swap: {
+ case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_fadd: {
LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
instr->src[1].ssa->bit_size);
result = visit_var_atomic(ctx, instr, ptr, 1);
case nir_intrinsic_deref_atomic_or:
case nir_intrinsic_deref_atomic_xor:
case nir_intrinsic_deref_atomic_exchange:
- case nir_intrinsic_deref_atomic_comp_swap: {
+ case nir_intrinsic_deref_atomic_comp_swap:
+ case nir_intrinsic_deref_atomic_fadd: {
LLVMValueRef ptr = get_src(ctx, instr->src[0]);
result = visit_var_atomic(ctx, instr, ptr, 1);
break;
instr->dest.ssa.bit_size);
break;
}
- case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_vertex:
- result = load_input(ctx, instr);
- break;
case nir_intrinsic_emit_vertex:
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
break;
case nir_intrinsic_shuffle:
if (ctx->ac.chip_class == GFX8 ||
ctx->ac.chip_class == GFX9 ||
- (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
+ (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
get_src(ctx, instr->src[1]));
} else {
offset_src = i;
break;
case nir_tex_src_bias:
- if (instr->op == nir_texop_txb)
- args.bias = get_src(ctx, instr->src[i].src);
+ args.bias = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_lod: {
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
case nir_tex_src_ddy:
ddy = get_src(ctx, instr->src[i].src);
break;
+ case nir_tex_src_min_lod:
+ args.min_lod = get_src(ctx, instr->src[i].src);
+ break;
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
case nir_tex_src_plane:
}
}
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
result = build_tex_intrinsic(ctx, instr, &args);
if (instr->op == nir_texop_query_levels)
}
+static bool is_def_used_in_an_export(const nir_ssa_def* def) {
+ nir_foreach_use(use_src, def) {
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+ if (instr->intrinsic == nir_intrinsic_store_deref)
+ return true;
+ } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+ if (instr->op == nir_op_vec4 &&
+ is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static void visit_ssa_undef(struct ac_nir_context *ctx,
const nir_ssa_undef_instr *instr)
{
unsigned num_components = instr->def.num_components;
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
- LLVMValueRef undef;
- if (num_components == 1)
- undef = LLVMGetUndef(type);
- else {
- undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+ } else {
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ if (num_components > 1) {
+ zero = ac_build_gather_values_extended(
+ &ctx->ac, &zero, 4, 0, false, false);
+ }
+ ctx->ssa_defs[instr->def.index] = zero;
}
- ctx->ssa_defs[instr->def.index] = undef;
}
static void visit_jump(struct ac_llvm_context *ctx,
break;
case nir_deref_type_ptr_as_array:
if (instr->mode == nir_var_mem_global) {
- unsigned stride = nir_deref_instr_ptr_as_array_stride(instr);
+ unsigned stride = nir_deref_instr_array_stride(instr);
LLVMValueRef index = get_src(ctx, instr->arr.index);
if (LLVMTypeOf(index) != ctx->ac.i64)
LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
if (LLVMTypeOf(result) != type) {
- if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMFixedVectorTypeKind) {
+ if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
result = LLVMBuildBitCast(ctx->ac.builder, result,
type, "");
} else {
{
int i, j;
ctx->num_locals = 0;
- nir_foreach_variable(variable, &func->impl->locals) {
+ nir_foreach_function_temp_variable(variable, func->impl) {
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
variable->data.driver_location = ctx->num_locals * 4;
variable->data.location_frac = 0;
ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
- nir_foreach_variable(variable, &nir->outputs)
- ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
- ctx.stage);
+ /* TODO: remove this after RADV switches to lowered IO */
+ if (!nir->info.io_lowered) {
+ nir_foreach_shader_out_variable(variable, nir) {
+ ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
+ ctx.stage);
+ }
+ }
ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
func = (struct nir_function *)exec_list_get_head(&nir->functions);
nir_index_ssa_defs(func->impl);
ralloc_free(ctx.defs);
ralloc_free(ctx.phis);
ralloc_free(ctx.vars);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ralloc_free(ctx.verified_interp);
}
bool
*/
indirect_mask |= nir_var_function_temp;
- progress |= nir_lower_indirect_derefs(nir, indirect_mask);
+ progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
return progress;
}
static unsigned
get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
{
- if (intrin->intrinsic != nir_intrinsic_store_deref)
+ if (intrin->intrinsic != nir_intrinsic_store_output)
return 0;
- nir_variable *var =
- nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
-
- if (var->data.mode != nir_var_shader_out)
- return 0;
+ unsigned writemask = nir_intrinsic_write_mask(intrin) <<
+ nir_intrinsic_component(intrin);
+ unsigned location = nir_intrinsic_io_semantics(intrin).location;
- unsigned writemask = 0;
- const int location = var->data.location;
- unsigned first_component = var->data.location_frac;
- unsigned num_comps = intrin->dest.ssa.num_components;
+ if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ return writemask << 4;
+ else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+ return writemask;
- if (location == VARYING_SLOT_TESS_LEVEL_INNER)
- writemask = ((1 << (num_comps + 1)) - 1) << first_component;
- else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
- writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4;
-
- return writemask;
+ return 0;
}
static void