struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
+ struct hash_table *verified_interp;
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMTypeRef src1_type = LLVMTypeOf(src1);
LLVMTypeRef src2_type = LLVMTypeOf(src2);
- assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
+ assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMFixedVectorTypeKind);
if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
unsigned num_components = instr->dest.dest.ssa.num_components;
unsigned src_components;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
+ bool saved_inexact = false;
+
+ if (instr->exact)
+ saved_inexact = ac_disable_inexact_math(ctx->ac.builder);
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
switch (instr->op) {
ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frsq:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_to_integer_or_pointer(&ctx->ac, result);
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
+
+ if (instr->exact)
+ ac_restore_inexact_math(ctx->ac.builder, saved_inexact);
}
static void visit_load_const(struct ac_nir_context *ctx,
count = 1;
num_bytes = 2;
}
+
+ /* Due to alignment issues, split stores of 8-bit/16-bit
+ * vectors.
+ */
+ if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
+ writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+ count = 1;
+ num_bytes = elem_size_bytes;
+ }
+
data = extract_vector_range(&ctx->ac, base_data, start, count);
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
LLVMTypeRef src_component_type;
- if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
+ if (LLVMGetTypeKind(dest_type) == LLVMFixedVectorTypeKind)
src_component_type = LLVMGetElementType(dest_type);
else
src_component_type = dest_type;
break;
case nir_var_mem_global: {
LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
unsigned natural_stride = type_scalar_size_bytes(deref->type);
unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
+ bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
- LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
- if (stride != natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ if (stride != natural_stride || split_loads) {
+ if (LLVMGetTypeKind(result_type) == LLVMFixedVectorTypeKind)
+ result_type = LLVMGetElementType(result_type);
+
+ LLVMTypeRef ptr_type = LLVMPointerType(result_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
unsigned natural_stride = type_scalar_size_bytes(deref->type);
unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
+ bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
- stride == natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ stride == natural_stride && !split_stores) {
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
val = LLVMBuildBitCast(ctx->ac.builder, val,
LLVMGetElementType(LLVMTypeOf(address)), "");
LLVMBuildStore(ctx->ac.builder, val, address);
} else {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ LLVMTypeRef val_type = LLVMTypeOf(val);
+ if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMFixedVectorTypeKind)
+ val_type = LLVMGetElementType(val_type);
+
+ LLVMTypeRef ptr_type = LLVMPointerType(val_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
for (unsigned chan = 0; chan < 4; chan++) {
if (!(writemask & (1 << chan)))
unsigned bitsize)
{
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+ LLVMValueRef interp_param_f;
- interp_param = LLVMBuildBitCast(ctx->ac.builder,
+ interp_param_f = LLVMBuildBitCast(ctx->ac.builder,
interp_param, ctx->ac.v2f32, "");
LLVMValueRef i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
LLVMValueRef j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+ ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
+
+ /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
+ if (ctx->verified_interp &&
+ !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
+ LLVMValueRef args[2];
+ args[0] = i;
+ args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
+ LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+ args, 2, AC_FUNC_ATTR_READNONE);
+ ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
+ _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
+ }
LLVMValueRef values[4];
assert(bitsize == 16 || bitsize == 32);
case nir_intrinsic_emit_vertex:
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
break;
+ case nir_intrinsic_emit_vertex_with_counter: {
+ unsigned stream = nir_intrinsic_stream_id(instr);
+ LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+ ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
+ next_vertex,
+ ctx->abi->outputs);
+ break;
+ }
case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
break;
case nir_intrinsic_load_tess_coord:
case nir_tex_src_ddy:
ddy = get_src(ctx, instr->src[i].src);
break;
+ case nir_tex_src_min_lod:
+ args.min_lod = get_src(ctx, instr->src[i].src);
+ break;
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
case nir_tex_src_plane:
if (instr->op == nir_texop_texture_samples) {
LLVMValueRef res, samples, is_msaa;
+ LLVMValueRef default_sample;
+
res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
samples = LLVMBuildExtractElement(ctx->ac.builder, res,
LLVMConstInt(ctx->ac.i32, 3, false), "");
LLVMConstInt(ctx->ac.i32, 0xf, false), "");
samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
samples, "");
+
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef dword1, is_null_descriptor;
+
+ /* Extract the second dword of the descriptor, if it's
+ * all zero, then it's a null descriptor.
+ */
+ dword1 = LLVMBuildExtractElement(ctx->ac.builder, res,
+ LLVMConstInt(ctx->ac.i32, 1, false), "");
+ is_null_descriptor =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ default_sample =
+ LLVMBuildSelect(ctx->ac.builder, is_null_descriptor,
+ ctx->ac.i32_0, ctx->ac.i32_1, "");
+ } else {
+ default_sample = ctx->ac.i32_1;
+ }
+
samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
- ctx->ac.i32_1, "");
+ default_sample, "");
result = samples;
goto write_result;
}
}
+static bool is_def_used_in_an_export(const nir_ssa_def* def) {
+ nir_foreach_use(use_src, def) {
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+ if (instr->intrinsic == nir_intrinsic_store_deref)
+ return true;
+ } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+ nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+ if (instr->op == nir_op_vec4 &&
+ is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static void visit_ssa_undef(struct ac_nir_context *ctx,
const nir_ssa_undef_instr *instr)
{
unsigned num_components = instr->def.num_components;
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
- LLVMValueRef undef;
- if (num_components == 1)
- undef = LLVMGetUndef(type);
- else {
- undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+ } else {
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ if (num_components > 1) {
+ zero = ac_build_gather_values_extended(
+ &ctx->ac, &zero, 4, 0, false, false);
+ }
+ ctx->ssa_defs[instr->def.index] = zero;
}
- ctx->ssa_defs[instr->def.index] = undef;
}
static void visit_jump(struct ac_llvm_context *ctx,
LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
if (LLVMTypeOf(result) != type) {
- if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
+ if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMFixedVectorTypeKind) {
result = LLVMBuildBitCast(ctx->ac.builder, result,
type, "");
} else {
ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
func = (struct nir_function *)exec_list_get_head(&nir->functions);
nir_index_ssa_defs(func->impl);
ralloc_free(ctx.defs);
ralloc_free(ctx.phis);
ralloc_free(ctx.vars);
+ if (ctx.abi->kill_ps_if_inf_interp)
+ ralloc_free(ctx.verified_interp);
}
bool