X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fllvm%2Fac_nir_to_llvm.c;h=6f0d253d9c7a38c54a51ae37b4013546b5eb45fb;hb=0d63a1a84d409d08fd6c6f8a0c569b2620d6a600;hp=49627990163e27ca4daeaa468b3d439fbe5dedf0;hpb=8e4e2cedcf53d0f9649d51fc3acccaada96172bb;p=mesa.git diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 49627990163..6f0d253d9c7 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -51,6 +51,7 @@ struct ac_nir_context { struct hash_table *defs; struct hash_table *phis; struct hash_table *vars; + struct hash_table *verified_interp; LLVMValueRef main_function; LLVMBasicBlockRef continue_block; @@ -245,7 +246,7 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMTypeRef src1_type = LLVMTypeOf(src1); LLVMTypeRef src2_type = LLVMTypeOf(src2); - assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind); + assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMFixedVectorTypeKind); if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) { @@ -589,6 +590,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) unsigned num_components = instr->dest.dest.ssa.num_components; unsigned src_components; LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); + bool saved_inexact = false; + + if (instr->exact) + saved_inexact = ac_disable_inexact_math(ctx->ac.builder); assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); switch (instr->op) { @@ -688,8 +693,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_frcp: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -834,9 +839,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) ac_to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_frsq: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", - ac_to_float_type(&ctx->ac, def_type), src[0]); - result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result); + result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_frexp_exp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -1183,6 +1187,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_to_integer_or_pointer(&ctx->ac, result); ctx->ssa_defs[instr->dest.dest.ssa.index] = result; } + + if (instr->exact) + ac_restore_inexact_math(ctx->ac.builder, saved_inexact); } static void visit_load_const(struct ac_nir_context *ctx, @@ -1741,6 +1748,16 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, count = 1; num_bytes = 2; } + + /* Due to alignment issues, split stores of 8-bit/16-bit + * vectors. + */ + if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) { + writemask |= ((1u << (count - 1)) - 1u) << (start + 1); + count = 1; + num_bytes = elem_size_bytes; + } + data = extract_vector_range(&ctx->ac, base_data, start, count); offset = LLVMBuildAdd(ctx->ac.builder, base_offset, @@ -2177,7 +2194,7 @@ static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); LLVMTypeRef src_component_type; - if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) + if (LLVMGetTypeKind(dest_type) == LLVMFixedVectorTypeKind) src_component_type = LLVMGetElementType(dest_type); else src_component_type = dest_type; @@ -2329,14 +2346,19 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, break; case nir_var_mem_global: { LLVMValueRef address = get_src(ctx, instr->src[0]); + LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); unsigned explicit_stride = glsl_get_explicit_stride(deref->type); unsigned natural_stride = type_scalar_size_bytes(deref->type); unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8; + bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; - LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); - if (stride != natural_stride) { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + if (stride != natural_stride || split_loads) { + if (LLVMGetTypeKind(result_type) == LLVMFixedVectorTypeKind) + result_type = LLVMGetElementType(result_type); + + LLVMTypeRef ptr_type = LLVMPointerType(result_type, + LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) { @@ -2490,23 +2512,29 @@ visit_store_var(struct ac_nir_context *ctx, unsigned explicit_stride = glsl_get_explicit_stride(deref->type); unsigned natural_stride = type_scalar_size_bytes(deref->type); unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8; + bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 && - stride == natural_stride) { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + stride == natural_stride && !split_stores) { + LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), + LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); val = LLVMBuildBitCast(ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); LLVMBuildStore(ctx->ac.builder, val, address); } else { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + LLVMTypeRef val_type = LLVMTypeOf(val); + if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMFixedVectorTypeKind) + val_type = LLVMGetElementType(val_type); + + LLVMTypeRef ptr_type = LLVMPointerType(val_type, + LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); for (unsigned chan = 0; chan < 4; chan++) { if (!(writemask & (1 << chan))) @@ -3484,13 +3512,26 @@ static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, unsigned bitsize) { LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); + LLVMValueRef interp_param_f; - interp_param = LLVMBuildBitCast(ctx->ac.builder, + interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, ""); LLVMValueRef i = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->ac.i32_0, ""); + ctx->ac.builder, interp_param_f, ctx->ac.i32_0, ""); LLVMValueRef j = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); + ctx->ac.builder, interp_param_f, ctx->ac.i32_1, ""); + + /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */ + if (ctx->verified_interp && + !_mesa_hash_table_search(ctx->verified_interp, interp_param)) { + LLVMValueRef args[2]; + args[0] = i; + args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false); + LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, + args, 2, AC_FUNC_ATTR_READNONE); + ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, "")); + _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param); + } LLVMValueRef values[4]; assert(bitsize == 16 || bitsize == 32); @@ -3918,7 +3959,16 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_emit_vertex: ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); break; + case nir_intrinsic_emit_vertex_with_counter: { + unsigned stream = nir_intrinsic_stream_id(instr); + LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); + ctx->abi->emit_vertex_with_counter(ctx->abi, stream, + next_vertex, + ctx->abi->outputs); + break; + } case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); break; case nir_intrinsic_load_tess_coord: @@ -3950,8 +4000,33 @@ static void visit_intrinsic(struct ac_nir_context *ctx, break; } case nir_intrinsic_shuffle: - result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), - get_src(ctx, instr->src[1])); + if (ctx->ac.chip_class == GFX8 || + ctx->ac.chip_class == GFX9 || + (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) { + result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), + get_src(ctx, instr->src[1])); + } else { + LLVMValueRef src = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx, instr->src[1]); + LLVMTypeRef type = LLVMTypeOf(src); + struct waterfall_context wctx; + LLVMValueRef index_val; + + index_val = enter_waterfall(ctx, &wctx, index, true); + + src = LLVMBuildZExt(ctx->ac.builder, src, + ctx->ac.i32, ""); + + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", + ctx->ac.i32, + (LLVMValueRef []) { src, index_val }, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_CONVERGENT); + + result = LLVMBuildTrunc(ctx->ac.builder, result, type, ""); + + result = exit_waterfall(ctx, &wctx, result); + } break; case nir_intrinsic_reduce: result = ac_build_reduce(&ctx->ac, @@ -4393,6 +4468,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) case nir_tex_src_ddy: ddy = get_src(ctx, instr->src[i].src); break; + case nir_tex_src_min_lod: + args.min_lod = get_src(ctx, instr->src[i].src); + break; case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: case nir_tex_src_plane: @@ -4408,6 +4486,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; + LLVMValueRef default_sample; + res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), ""); @@ -4424,8 +4504,27 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) LLVMConstInt(ctx->ac.i32, 0xf, false), ""); samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); + + if (ctx->abi->robust_buffer_access) { + LLVMValueRef dword1, is_null_descriptor; + + /* Extract the second dword of the descriptor, if it's + * all zero, then it's a null descriptor. + */ + dword1 = LLVMBuildExtractElement(ctx->ac.builder, res, + LLVMConstInt(ctx->ac.i32, 1, false), ""); + is_null_descriptor = + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1, + LLVMConstInt(ctx->ac.i32, 0, false), ""); + default_sample = + LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, + ctx->ac.i32_0, ctx->ac.i32_1, ""); + } else { + default_sample = ctx->ac.i32_1; + } + samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, - ctx->ac.i32_1, ""); + default_sample, ""); result = samples; goto write_result; } @@ -4703,19 +4802,46 @@ static void phi_post_pass(struct ac_nir_context *ctx) } +static bool is_def_used_in_an_export(const nir_ssa_def* def) { + nir_foreach_use(use_src, def) { + if (use_src->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr); + if (instr->intrinsic == nir_intrinsic_store_deref) + return true; + } else if (use_src->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr); + if (instr->op == nir_op_vec4 && + is_def_used_in_an_export(&instr->dest.dest.ssa)) { + return true; + } + } + } + return false; +} + static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr) { unsigned num_components = instr->def.num_components; LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - LLVMValueRef undef; - if (num_components == 1) - undef = LLVMGetUndef(type); - else { - undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) { + LLVMValueRef undef; + + if (num_components == 1) + undef = LLVMGetUndef(type); + else { + undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + } + ctx->ssa_defs[instr->def.index] = undef; + } else { + LLVMValueRef zero = LLVMConstInt(type, 0, false); + if (num_components > 1) { + zero = ac_build_gather_values_extended( + &ctx->ac, &zero, 4, 0, false, false); + } + ctx->ssa_defs[instr->def.index] = zero; } - ctx->ssa_defs[instr->def.index] = undef; } static void visit_jump(struct ac_llvm_context *ctx, @@ -4896,7 +5022,7 @@ static void visit_deref(struct ac_nir_context *ctx, LLVMTypeRef type = LLVMPointerType(pointee_type, address_space); if (LLVMTypeOf(result) != type) { - if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) { + if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMFixedVectorTypeKind) { result = LLVMBuildBitCast(ctx->ac.builder, result, type, ""); } else { @@ -5171,6 +5297,10 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + if (ctx.abi->kill_ps_if_inf_interp) + ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + func = (struct nir_function *)exec_list_get_head(&nir->functions); nir_index_ssa_defs(func->impl); @@ -5205,6 +5335,8 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ralloc_free(ctx.defs); ralloc_free(ctx.phis); ralloc_free(ctx.vars); + if (ctx.abi->kill_ps_if_inf_interp) + ralloc_free(ctx.verified_interp); } bool