struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
+ struct hash_table *verified_interp;
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMTypeRef result_type,
LLVMValueRef src0)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
}
LLVMTypeRef result_type,
LLVMValueRef src0, LLVMValueRef src1)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
}
LLVMTypeRef result_type,
LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
{
- char name[64];
+ char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
ac_to_float(ctx, src2),
};
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
+ ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
}
unsigned num_components = instr->dest.dest.ssa.num_components;
unsigned src_components;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
+ bool saved_inexact = false;
+
+ if (instr->exact)
+ saved_inexact = ac_disable_inexact_math(ctx->ac.builder);
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
switch (instr->op) {
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f16_rtz:
+ case nir_op_f2f16:
+ case nir_op_f2fmp:
src[0] = ac_to_float(&ctx->ac, src[0]);
- if (LLVMTypeOf(src[0]) == ctx->ac.f64)
- src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
- LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
- result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+
+ /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
+ * all f32->f16 conversions have to round towards zero, because both scalar
+ * and vec2 down-conversions have to round equally.
+ */
+ if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL ||
+ instr->op == nir_op_f2f16_rtz) {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+
+ if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
+
+ /* Fast path conversion. This only works if NIR is vectorized
+ * to vec2 16.
+ */
+ if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
+ LLVMValueRef args[] = {
+ ac_llvm_extract_elem(&ctx->ac, src[0], 0),
+ ac_llvm_extract_elem(&ctx->ac, src[0], 1),
+ };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
+ break;
+ }
+
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ } else {
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ else
+ result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ }
break;
case nir_op_f2f16_rtne:
- case nir_op_f2f16:
case nir_op_f2f32:
case nir_op_f2f64:
src[0] = ac_to_float(&ctx->ac, src[0]);
break;
case nir_op_u2u8:
case nir_op_u2u16:
+ case nir_op_u2ump:
case nir_op_u2u32:
case nir_op_u2u64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
break;
case nir_op_i2i8:
case nir_op_i2i16:
+ case nir_op_i2imp:
case nir_op_i2i32:
case nir_op_i2i64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
result = ac_to_integer_or_pointer(&ctx->ac, result);
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
+
+ if (instr->exact)
+ ac_restore_inexact_math(ctx->ac.builder, saved_inexact);
}
static void visit_load_const(struct ac_nir_context *ctx,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ assert(instr->dest.is_ssa);
return ac_build_buffer_load_format(&ctx->ac,
args->resource,
args->coords[0],
ctx->ac.i32_0,
util_last_bit(mask),
- 0, true);
+ 0, true,
+ instr->dest.ssa.bit_size == 16);
}
args->opcode = ac_image_sample;
break;
case nir_texop_tg4:
args->opcode = ac_image_gather4;
- args->level_zero = true;
+ if (!args->lod && !args->bias)
+ args->level_zero = true;
break;
case nir_texop_lod:
args->opcode = ac_image_get_lod;
if (instr->dest.ssa.bit_size == 8) {
unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
LLVMValueRef params[3];
if (load_dwords > 1) {
- LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+ LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
} else {
res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
if (instr->dest.ssa.num_components > 1)
- res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
+ res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
return res;
} else if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
}
if (access & ACCESS_STREAM_CACHE_POLICY)
- cache_policy |= ac_slc;
+ cache_policy |= ac_slc | ac_glc;
return cache_policy;
}
vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, "");
+ assert(instr->dest.is_ssa);
bool can_speculate = access & ACCESS_CAN_REORDER;
res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
ctx->ac.i32_0, num_channels,
args.cache_policy,
- can_speculate);
+ can_speculate,
+ instr->dest.ssa.bit_size == 16);
res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
args.dmask = 15;
args.attributes = AC_FUNC_ATTR_READONLY;
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
res = ac_build_image_opcode(&ctx->ac, &args);
}
return exit_waterfall(ctx, &wctx, res);
ctx->ac.i32_0, "");
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
- ctx->ac.i32_0, src_channels,
- args.cache_policy);
+ ctx->ac.i32_0, args.cache_policy);
} else {
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
if (!level_zero)
args.lod = get_src(ctx, instr->src[4]);
args.dmask = 15;
+ args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
ac_build_image_opcode(&ctx->ac, &args);
}
unsigned bitsize)
{
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+ LLVMValueRef interp_param_f;
- interp_param = LLVMBuildBitCast(ctx->ac.builder,
+ interp_param_f = LLVMBuildBitCast(ctx->ac.builder,
interp_param, ctx->ac.v2f32, "");
LLVMValueRef i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
LLVMValueRef j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
+
+ /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
+ if (ctx->verified_interp &&
+ !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
+ LLVMValueRef args[2];
+ args[0] = i;
+ args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
+ LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+ args, 2, AC_FUNC_ATTR_READNONE);
+ ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
+ _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
+ }
LLVMValueRef values[4];
assert(bitsize == 16 || bitsize == 32);
result = visit_image_size(ctx, instr, false);
break;
case nir_intrinsic_shader_clock:
- result = ac_build_shader_clock(&ctx->ac);
+ result = ac_build_shader_clock(&ctx->ac,
+ nir_intrinsic_memory_scope(instr));
break;
case nir_intrinsic_discard:
case nir_intrinsic_discard_if:
offset_src = i;
break;
case nir_tex_src_bias:
- if (instr->op == nir_texop_txb)
- args.bias = get_src(ctx, instr->src[i].src);
+ args.bias = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_lod: {
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
case nir_tex_src_ddy:
ddy = get_src(ctx, instr->src[i].src);
break;
+ case nir_tex_src_min_lod:
+ args.min_lod = get_src(ctx, instr->src[i].src);
+ break;
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
case nir_tex_src_plane:
if (instr->op == nir_texop_texture_samples) {
LLVMValueRef res, samples, is_msaa;
+ LLVMValueRef default_sample;
+
res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
samples = LLVMBuildExtractElement(ctx->ac.builder, res,
LLVMConstInt(ctx->ac.i32, 3, false), "");
LLVMConstInt(ctx->ac.i32, 0xf, false), "");
samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
samples, "");
+
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef dword1, is_null_descriptor;
+
+ /* Extract the second dword of the descriptor, if it's
+ * all zero, then it's a null descriptor.
+ */
+ dword1 = LLVMBuildExtractElement(ctx->ac.builder, res,
+ LLVMConstInt(ctx->ac.i32, 1, false), "");
+ is_null_descriptor =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ default_sample =
+ LLVMBuildSelect(ctx->ac.builder, is_null_descriptor,
+ ctx->ac.i32_0, ctx->ac.i32_1, "");
+ } else {
+ default_sample = ctx->ac.i32_1;
+ }
+
samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
- ctx->ac.i32_1, "");
+ default_sample, "");
result = samples;
goto write_result;
}
}
}
+ assert(instr->dest.is_ssa);
+ args.d16 = instr->dest.ssa.bit_size == 16;
+
result = build_tex_intrinsic(ctx, instr, &args);
if (instr->op == nir_texop_query_levels)
}
+static bool is_def_used_in_an_export(const nir_ssa_def* def) {
+ nir_foreach_use(use_src, def) {
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+ if (instr->intrinsic == nir_intrinsic_store_deref)
+ return true;
+ } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+ if (instr->op == nir_op_vec4 &&
+ is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static void visit_ssa_undef(struct ac_nir_context *ctx,
const nir_ssa_undef_instr *instr)
{
unsigned num_components = instr->def.num_components;
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
- LLVMValueRef undef;
- if (num_components == 1)
- undef = LLVMGetUndef(type);
- else {
- undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+ } else {
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ if (num_components > 1) {
+ zero = ac_build_gather_values_extended(
+ &ctx->ac, &zero, 4, 0, false, false);
+ }
+ ctx->ssa_defs[instr->def.index] = zero;
}
- ctx->ssa_defs[instr->def.index] = undef;
}
static void visit_jump(struct ac_llvm_context *ctx,
ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
func = (struct nir_function *)exec_list_get_head(&nir->functions);
nir_index_ssa_defs(func->impl);
ralloc_free(ctx.defs);
ralloc_free(ctx.phis);
ralloc_free(ctx.vars);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ralloc_free(ctx.verified_interp);
}
bool