X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fllvm%2Fac_nir_to_llvm.c;h=857c0f472f833cdfae2e301c27b5787916e00f73;hb=52f80f901ceac95794d2a1c58c12c3f5c8ce5726;hp=871c6abc17fb4c5b8afe4de521c84ad532dbcb0b;hpb=7ac8bb33cd6025f805a390e7647506e932f4db0d;p=mesa.git diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 871c6abc17f..857c0f472f8 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -51,6 +51,7 @@ struct ac_nir_context { struct hash_table *defs; struct hash_table *phis; struct hash_table *vars; + struct hash_table *verified_interp; LLVMValueRef main_function; LLVMBasicBlockRef continue_block; @@ -169,6 +170,17 @@ static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred, LLVMValueRef src0, LLVMValueRef src1) { + LLVMTypeRef src0_type = LLVMTypeOf(src0); + LLVMTypeRef src1_type = LLVMTypeOf(src1); + + if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { + src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, ""); + } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && + LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) { + src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, ""); + } + LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); return LLVMBuildSelect(ctx->builder, result, LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), @@ -193,30 +205,59 @@ static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, LLVMTypeRef result_type, LLVMValueRef src0) { - char name[64]; + char name[64], type[64]; LLVMValueRef params[] = { ac_to_float(ctx, src0), }; - ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); } +static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, + const char *intrin, + LLVMTypeRef result_type, + LLVMValueRef src0) +{ + if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind) + return emit_intrin_1f_param(ctx, intrin, result_type, src0); + + LLVMTypeRef elem_type = LLVMGetElementType(result_type); + LLVMValueRef ret = LLVMGetUndef(result_type); + + /* Scalarize the intrinsic, because vectors are not supported. */ + for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) { + char name[64], type[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)), + }; + + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); + assert(length < sizeof(name)); + ret = LLVMBuildInsertElement(ctx->builder, ret, + ac_build_intrinsic(ctx, name, elem_type, params, + 1, AC_FUNC_ATTR_READNONE), + LLVMConstInt(ctx->i32, i, 0), ""); + } + return ret; +} + static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin, LLVMTypeRef result_type, LLVMValueRef src0, LLVMValueRef src1) { - char name[64]; + char name[64], type[64]; LLVMValueRef params[] = { ac_to_float(ctx, src0), ac_to_float(ctx, src1), }; - ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); } @@ -226,15 +267,15 @@ static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, LLVMTypeRef result_type, LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) { - char name[64]; + char name[64], type[64]; LLVMValueRef params[] = { ac_to_float(ctx, src0), ac_to_float(ctx, src1), ac_to_float(ctx, src2), }; - ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); + ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type)); + ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type); assert(length < sizeof(name)); return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); } @@ -688,8 +729,18 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_frcp: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]); + /* For doubles, we need precise division to pass GLCTS. */ + if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && + ac_get_type_size(def_type) == 8) { + result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, + ac_to_float(&ctx->ac, src[0]), ""); + } else { + result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp", + ac_to_float_type(&ctx->ac, def_type), src[0]); + } + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -748,7 +799,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_feq32: result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]); break; - case nir_op_fne32: + case nir_op_fneu32: result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); break; case nir_op_flt32: @@ -809,9 +860,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) ac_to_float_type(&ctx->ac, def_type),src[0]); break; case nir_op_ffract: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fract(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); + result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_fsin: result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", @@ -834,9 +884,11 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) ac_to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_frsq: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", - ac_to_float_type(&ctx->ac, def_type), src[0]); - result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result); + result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", + ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_frexp_exp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -878,7 +930,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_ffma: /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ldexp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -939,15 +991,45 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2fmp: src[0] = ac_to_float(&ctx->ac, src[0]); - if (LLVMTypeOf(src[0]) == ctx->ac.f64) - src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, ""); - LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 }; - result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + + /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it, + * all f32->f16 conversions have to round towards zero, because both scalar + * and vec2 down-conversions have to round equally. + */ + if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || + instr->op == nir_op_f2f16_rtz) { + src[0] = ac_to_float(&ctx->ac, src[0]); + + if (LLVMTypeOf(src[0]) == ctx->ac.f64) + src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, ""); + + /* Fast path conversion. This only works if NIR is vectorized + * to vec2 16. + */ + if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) { + LLVMValueRef args[] = { + ac_llvm_extract_elem(&ctx->ac, src[0], 0), + ac_llvm_extract_elem(&ctx->ac, src[0], 1), + }; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, args); + break; + } + + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) }; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + } else { + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + else + result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + } break; case nir_op_f2f16_rtne: - case nir_op_f2f16: case nir_op_f2f32: case nir_op_f2f64: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -958,6 +1040,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_u2u8: case nir_op_u2u16: + case nir_op_u2ump: case nir_op_u2u32: case nir_op_u2u64: if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) @@ -967,6 +1050,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_i2i8: case nir_op_i2i16: + case nir_op_i2imp: case nir_op_i2i32: case nir_op_i2i64: if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) @@ -1120,57 +1204,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; } - case nir_op_fmin3: - result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", - ac_to_float_type(&ctx->ac, def_type), result, src[2]); - break; - case nir_op_umin3: - result = ac_build_umin(&ctx->ac, src[0], src[1]); - result = ac_build_umin(&ctx->ac, result, src[2]); - break; - case nir_op_imin3: - result = ac_build_imin(&ctx->ac, src[0], src[1]); - result = ac_build_imin(&ctx->ac, result, src[2]); - break; - case nir_op_fmax3: - result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", - ac_to_float_type(&ctx->ac, def_type), result, src[2]); - break; - case nir_op_umax3: - result = ac_build_umax(&ctx->ac, src[0], src[1]); - result = ac_build_umax(&ctx->ac, result, src[2]); - break; - case nir_op_imax3: - result = ac_build_imax(&ctx->ac, src[0], src[1]); - result = ac_build_imax(&ctx->ac, result, src[2]); - break; - case nir_op_fmed3: { - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - src[2] = ac_to_float(&ctx->ac, src[2]); - result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2], - instr->dest.dest.ssa.bit_size); - break; - } - case nir_op_imed3: { - LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]); - LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]); - tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]); - result = ac_build_imax(&ctx->ac, tmp1, tmp2); - break; - } - case nir_op_umed3: { - LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]); - LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]); - tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]); - result = ac_build_umax(&ctx->ac, tmp1, tmp2); - break; - } - default: fprintf(stderr, "Unknown NIR alu instr: "); nir_print_instr(&instr->instr, stderr); @@ -1429,12 +1462,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + assert(instr->dest.is_ssa); return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0, util_last_bit(mask), - 0, true); + 0, true, + instr->dest.ssa.bit_size == 16); } args->opcode = ac_image_sample; @@ -1463,7 +1498,8 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, break; case nir_texop_tg4: args->opcode = ac_image_gather4; - args->level_zero = true; + if (!args->lod && !args->bias) + args->level_zero = true; break; case nir_texop_lod: args->opcode = ac_image_get_lod; @@ -1566,13 +1602,13 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, if (instr->dest.ssa.bit_size == 8) { unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1; - LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords); + LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords); ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); LLVMValueRef params[3]; if (load_dwords > 1) { - LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), ""); + LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, ""); params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), ""); params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), ""); } else { @@ -1585,11 +1621,11 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), ""); if (instr->dest.ssa.num_components > 1) - res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), ""); + res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), ""); return res; } else if (instr->dest.ssa.bit_size == 16) { unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1; - LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords); + LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords); ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, ""); @@ -1673,7 +1709,7 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, } if (access & ACCESS_STREAM_CACHE_POLICY) - cache_policy |= ac_slc; + cache_policy |= ac_slc | ac_glc; return cache_policy; } @@ -1741,6 +1777,16 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, count = 1; num_bytes = 2; } + + /* Due to alignment issues, split stores of 8-bit/16-bit + * vectors. + */ + if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) { + writemask |= ((1u << (count - 1)) - 1u) << (start + 1); + count = 1; + num_bytes = elem_size_bytes; + } + data = extract_vector_range(&ctx->ac, base_data, start, count); offset = LLVMBuildAdd(ctx->ac.builder, base_offset, @@ -2244,6 +2290,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, switch (mode) { case nir_var_shader_in: + /* TODO: remove this after RADV switches to lowered IO */ if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { return load_tess_varyings(ctx, instr, true); @@ -2299,6 +2346,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, } break; case nir_var_shader_out: + /* TODO: remove this after RADV switches to lowered IO */ if (ctx->stage == MESA_SHADER_TESS_CTRL) { return load_tess_varyings(ctx, instr, false); } @@ -2329,20 +2377,28 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, break; case nir_var_mem_global: { LLVMValueRef address = get_src(ctx, instr->src[0]); + LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); unsigned explicit_stride = glsl_get_explicit_stride(deref->type); unsigned natural_stride = type_scalar_size_bytes(deref->type); unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8; + bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; - LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa); - if (stride != natural_stride) { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + if (stride != natural_stride || split_loads) { + if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind) + result_type = LLVMGetElementType(result_type); + + LLVMTypeRef ptr_type = LLVMPointerType(result_type, + LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) { LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0); values[i] = LLVMBuildLoad(ctx->ac.builder, ac_build_gep_ptr(&ctx->ac, address, offset), ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic); } return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components); } else { @@ -2350,6 +2406,9 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic); return val; } } @@ -2408,7 +2467,7 @@ visit_store_var(struct ac_nir_context *ctx, switch (deref->mode) { case nir_var_shader_out: - + /* TODO: remove this after RADV switches to lowered IO */ if (ctx->stage == MESA_SHADER_TESS_CTRL) { LLVMValueRef vertex_index = NULL; LLVMValueRef indir_index = NULL; @@ -2423,7 +2482,9 @@ visit_store_var(struct ac_nir_context *ctx, ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, - const_index, src, writemask); + const_index, src, writemask, + var->data.location_frac, + var->data.driver_location); break; } @@ -2490,23 +2551,32 @@ visit_store_var(struct ac_nir_context *ctx, unsigned explicit_stride = glsl_get_explicit_stride(deref->type); unsigned natural_stride = type_scalar_size_bytes(deref->type); unsigned stride = explicit_stride ? explicit_stride : natural_stride; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8; + bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4; LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 && - stride == natural_stride) { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + stride == natural_stride && !split_stores) { + LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val), + LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); val = LLVMBuildBitCast(ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); - LLVMBuildStore(ctx->ac.builder, val, address); + LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); } else { - LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)), - LLVMGetPointerAddressSpace(LLVMTypeOf(address))); + LLVMTypeRef val_type = LLVMTypeOf(val); + if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind) + val_type = LLVMGetElementType(val_type); + + LLVMTypeRef ptr_type = LLVMPointerType(val_type, + LLVMGetPointerAddressSpace(LLVMTypeOf(address))); address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , ""); for (unsigned chan = 0; chan < 4; chan++) { if (!(writemask & (1 << chan))) @@ -2519,7 +2589,10 @@ visit_store_var(struct ac_nir_context *ctx, chan); src = LLVMBuildBitCast(ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); - LLVMBuildStore(ctx->ac.builder, src, ptr); + LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr); + + if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) + LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic); } } break; @@ -2533,6 +2606,71 @@ visit_store_var(struct ac_nir_context *ctx, ac_build_endif(&ctx->ac, 7002); } +static void +visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) +{ + if (ctx->ac.postponed_kill) { + LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, + ctx->ac.postponed_kill, ""); + ac_build_ifcc(&ctx->ac, cond, 7002); + } + + unsigned base = nir_intrinsic_base(instr); + unsigned writemask = nir_intrinsic_write_mask(instr); + unsigned component = nir_intrinsic_component(instr); + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); + nir_src offset = *nir_get_io_offset_src(instr); + LLVMValueRef indir_index = NULL; + + if (nir_src_is_const(offset)) + assert(nir_src_as_uint(offset) == 0); + else + indir_index = get_src(ctx, offset); + + switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) { + case 32: + break; + case 64: + writemask = widen_mask(writemask, 2); + src = LLVMBuildBitCast(ctx->ac.builder, src, + LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), + ""); + break; + default: + unreachable("unhandled store_output bit size"); + return; + } + + writemask <<= component; + + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr); + LLVMValueRef vertex_index = + vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL; + + ctx->abi->store_tcs_outputs(ctx->abi, NULL, + vertex_index, indir_index, + 0, src, writemask, + component, base * 4); + return; + } + + /* No indirect indexing is allowed after this point. */ + assert(!indir_index); + + for (unsigned chan = 0; chan < 8; chan++) { + if (!(writemask & (1 << chan))) + continue; + + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); + LLVMBuildStore(ctx->ac.builder, value, + ctx->abi->outputs[base * 4 + chan]); + } + + if (ctx->ac.postponed_kill) + ac_build_endif(&ctx->ac, 7002); +} + static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) { switch (dim) { @@ -2722,18 +2860,17 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, LLVMValueRef res; enum glsl_sampler_dim dim; - enum gl_access_qualifier access; + enum gl_access_qualifier access = nir_intrinsic_access(instr); bool is_array; if (bindless) { dim = nir_intrinsic_image_dim(instr); - access = nir_intrinsic_access(instr); is_array = nir_intrinsic_image_array(instr); } else { const nir_deref_instr *image_deref = get_image_deref(instr); const struct glsl_type *type = image_deref->type; const nir_variable *var = nir_deref_instr_get_variable(image_deref); dim = glsl_get_sampler_dim(type); - access = var->data.access; + access |= var->data.access; is_array = glsl_sampler_type_is_array(type); } @@ -2753,11 +2890,13 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); + assert(instr->dest.is_ssa); bool can_speculate = access & ACCESS_CAN_REORDER; res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, args.cache_policy, - can_speculate); + can_speculate, + instr->dest.ssa.bit_size == 16); res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); @@ -2774,6 +2913,9 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, args.dmask = 15; args.attributes = AC_FUNC_ATTR_READONLY; + assert(instr->dest.is_ssa); + args.d16 = instr->dest.ssa.bit_size == 16; + res = ac_build_image_opcode(&ctx->ac, &args); } return exit_waterfall(ctx, &wctx, res); @@ -2790,19 +2932,18 @@ static void visit_image_store(struct ac_nir_context *ctx, } enum glsl_sampler_dim dim; - enum gl_access_qualifier access; + enum gl_access_qualifier access = nir_intrinsic_access(instr); bool is_array; if (bindless) { dim = nir_intrinsic_image_dim(instr); - access = nir_intrinsic_access(instr); is_array = nir_intrinsic_image_array(instr); } else { const nir_deref_instr *image_deref = get_image_deref(instr); const struct glsl_type *type = image_deref->type; const nir_variable *var = nir_deref_instr_get_variable(image_deref); dim = glsl_get_sampler_dim(type); - access = var->data.access; + access |= var->data.access; is_array = glsl_sampler_type_is_array(type); } @@ -2828,8 +2969,7 @@ static void visit_image_store(struct ac_nir_context *ctx, ctx->ac.i32_0, ""); ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, - ctx->ac.i32_0, src_channels, - args.cache_policy); + ctx->ac.i32_0, args.cache_policy); } else { bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; @@ -2841,6 +2981,7 @@ static void visit_image_store(struct ac_nir_context *ctx, if (!level_zero) args.lod = get_src(ctx, instr->src[4]); args.dmask = 15; + args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16; ac_build_image_opcode(&ctx->ac, &args); } @@ -2946,16 +3087,6 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, case nir_intrinsic_image_deref_atomic_inc_wrap: { atomic_name = "inc"; atomic_subop = ac_atomic_inc_wrap; - /* ATOMIC_INC instruction does: - * value = (value + 1) % (data + 1) - * but we want: - * value = (value + 1) % data - * So replace 'data' by 'data - 1'. - */ - ctx->ssa_defs[instr->src[3].ssa->index] = - LLVMBuildSub(ctx->ac.builder, - ctx->ssa_defs[instr->src[3].ssa->index], - ctx->ac.i32_1, ""); break; } case nir_intrinsic_bindless_image_atomic_dec_wrap: @@ -3058,6 +3189,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, args.dmask = 0xf; args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false); args.opcode = ac_image_get_resinfo; + assert(nir_src_as_uint(instr->src[1]) == 0); args.lod = ctx->ac.i32_0; args.attributes = AC_FUNC_ATTR_READNONE; @@ -3328,11 +3460,26 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, case nir_intrinsic_deref_atomic_exchange: op = LLVMAtomicRMWBinOpXchg; break; +#if LLVM_VERSION_MAJOR >= 10 + case nir_intrinsic_shared_atomic_fadd: + case nir_intrinsic_deref_atomic_fadd: + op = LLVMAtomicRMWBinOpFAdd; + break; +#endif default: return NULL; } - result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope); + LLVMValueRef val; + + if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd || + instr->intrinsic == nir_intrinsic_deref_atomic_fadd) { + val = ac_to_float(&ctx->ac, src); + } else { + val = ac_to_integer(&ctx->ac, src); + } + + result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope); } if (ctx->ac.postponed_kill) @@ -3484,13 +3631,26 @@ static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, unsigned bitsize) { LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); + LLVMValueRef interp_param_f; - interp_param = LLVMBuildBitCast(ctx->ac.builder, + interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, ""); LLVMValueRef i = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->ac.i32_0, ""); + ctx->ac.builder, interp_param_f, ctx->ac.i32_0, ""); LLVMValueRef j = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); + ctx->ac.builder, interp_param_f, ctx->ac.i32_1, ""); + + /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */ + if (ctx->verified_interp && + !_mesa_hash_table_search(ctx->verified_interp, interp_param)) { + LLVMValueRef args[2]; + args[0] = i; + args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false); + LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, + args, 2, AC_FUNC_ATTR_READNONE); + ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, "")); + _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param); + } LLVMValueRef values[4]; assert(bitsize == 16 || bitsize == 32); @@ -3508,18 +3668,82 @@ static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components)); } -static LLVMValueRef load_input(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_load(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr, bool is_output) { - unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1; + LLVMValueRef values[8]; + LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); + LLVMTypeRef component_type; + unsigned base = nir_intrinsic_base(instr); + unsigned component = nir_intrinsic_component(instr); + unsigned count = instr->dest.ssa.num_components * + (instr->dest.ssa.bit_size == 64 ? 2 : 1); + nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr); + LLVMValueRef vertex_index = + vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL; + nir_src offset = *nir_get_io_offset_src(instr); + LLVMValueRef indir_index = NULL; - /* We only lower inputs for fragment shaders ATM */ - ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]); - assert(offset); - assert(offset[0].i32 == 0); + if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) + component_type = LLVMGetElementType(dest_type); + else + component_type = dest_type; - unsigned component = nir_intrinsic_component(instr); - unsigned index = nir_intrinsic_base(instr); + if (nir_src_is_const(offset)) + assert(nir_src_as_uint(offset) == 0); + else + indir_index = get_src(ctx, offset); + + if (ctx->stage == MESA_SHADER_TESS_CTRL || + (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) { + LLVMValueRef result = + ctx->abi->load_tess_varyings(ctx->abi, component_type, + vertex_index, indir_index, + 0, 0, base * 4, + component, + instr->num_components, + false, false, !is_output); + if (instr->dest.ssa.bit_size == 16) { + result = ac_to_integer(&ctx->ac, result); + result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, ""); + } + return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); + } + + /* No indirect indexing is allowed after this point. */ + assert(!indir_index); + + if (ctx->stage == MESA_SHADER_GEOMETRY) { + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + assert(nir_src_is_const(*vertex_index_src)); + + return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, + instr->num_components, + nir_src_as_uint(*vertex_index_src), + 0, type); + } + + if (ctx->stage == MESA_SHADER_FRAGMENT && is_output && + nir_intrinsic_io_semantics(instr).fb_fetch_output) + return ctx->abi->emit_fbfetch(ctx->abi); + + /* Other non-fragment cases have inputs and outputs in temporaries. */ + if (ctx->stage != MESA_SHADER_FRAGMENT) { + for (unsigned chan = component; chan < count + component; chan++) { + if (is_output) { + values[chan] = LLVMBuildLoad(ctx->ac.builder, + ctx->abi->outputs[base * 4 + chan], ""); + } else { + values[chan] = ctx->abi->inputs[base * 4 + chan]; + if (!values[chan]) + values[chan] = LLVMGetUndef(ctx->ac.i32); + } + } + LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component); + return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); + } + + /* Fragment shader inputs. */ unsigned vertex_id = 2; /* P0 */ if (instr->intrinsic == nir_intrinsic_load_input_vertex) { @@ -3540,18 +3764,11 @@ static LLVMValueRef load_input(struct ac_nir_context *ctx, } } - LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false); - LLVMValueRef values[8]; + LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false); - /* Each component of a 64-bit value takes up two GL-level channels. */ - unsigned num_components = instr->dest.ssa.num_components; - unsigned bit_size = instr->dest.ssa.bit_size; - unsigned channels = - bit_size == 64 ? num_components * 2 : num_components; - - for (unsigned chan = 0; chan < channels; chan++) { + for (unsigned chan = 0; chan < count; chan++) { if (component + chan > 4) - attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false); + attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false); LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false); values[chan] = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), @@ -3560,16 +3777,12 @@ static LLVMValueRef load_input(struct ac_nir_context *ctx, ac_get_arg(&ctx->ac, ctx->args->prim_mask)); values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, ""); values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan], - bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, ""); + instr->dest.ssa.bit_size == 16 ? ctx->ac.i16 + : ctx->ac.i32, ""); } - LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels); - if (bit_size == 64) { - LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 : - LLVMVectorType(ctx->ac.i64, num_components); - result = LLVMBuildBitCast(ctx->ac.builder, result, type, ""); - } - return result; + LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count); + return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); } static void visit_intrinsic(struct ac_nir_context *ctx, @@ -3766,6 +3979,19 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_store_deref: visit_store_var(ctx, instr); break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_per_vertex_input: + result = visit_load(ctx, instr, false); + break; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + result = visit_load(ctx, instr, true); + break; + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + visit_store_output(ctx, instr); + break; case nir_intrinsic_load_shared: result = visit_load_shared(ctx, instr); break; @@ -3823,7 +4049,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, result = visit_image_size(ctx, instr, false); break; case nir_intrinsic_shader_clock: - result = ac_build_shader_clock(&ctx->ac); + result = ac_build_shader_clock(&ctx->ac, + nir_intrinsic_memory_scope(instr)); break; case nir_intrinsic_discard: case nir_intrinsic_discard_if: @@ -3840,6 +4067,25 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_memory_barrier_shared: emit_membar(&ctx->ac, instr); break; + case nir_intrinsic_scoped_barrier: { + assert(!(nir_intrinsic_memory_semantics(instr) & + (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); + + nir_variable_mode modes = nir_intrinsic_memory_modes(instr); + + unsigned wait_flags = 0; + if (modes & (nir_var_mem_global | nir_var_mem_ssbo)) + wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; + if (modes & nir_var_mem_shared) + wait_flags |= AC_WAIT_LGKM; + + if (wait_flags) + ac_build_waitcnt(&ctx->ac, wait_flags); + + if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP) + ac_emit_barrier(&ctx->ac, ctx->stage); + break; + } case nir_intrinsic_memory_barrier_tcs_patch: break; case nir_intrinsic_control_barrier: @@ -3854,7 +4100,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_shared_atomic_or: case nir_intrinsic_shared_atomic_xor: case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: { + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_fadd: { LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size); result = visit_var_atomic(ctx, instr, ptr, 1); @@ -3869,7 +4116,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_deref_atomic_or: case nir_intrinsic_deref_atomic_xor: case nir_intrinsic_deref_atomic_exchange: - case nir_intrinsic_deref_atomic_comp_swap: { + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fadd: { LLVMValueRef ptr = get_src(ctx, instr->src[0]); result = visit_var_atomic(ctx, instr, ptr, 1); break; @@ -3911,14 +4159,19 @@ static void visit_intrinsic(struct ac_nir_context *ctx, instr->dest.ssa.bit_size); break; } - case nir_intrinsic_load_input: - case nir_intrinsic_load_input_vertex: - result = load_input(ctx, instr); - break; case nir_intrinsic_emit_vertex: ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); break; + case nir_intrinsic_emit_vertex_with_counter: { + unsigned stream = nir_intrinsic_stream_id(instr); + LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); + ctx->abi->emit_vertex_with_counter(ctx->abi, stream, + next_vertex, + ctx->abi->outputs); + break; + } case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); break; case nir_intrinsic_load_tess_coord: @@ -3952,7 +4205,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_shuffle: if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 || - (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) { + (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) { result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); } else { @@ -4397,8 +4650,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) offset_src = i; break; case nir_tex_src_bias: - if (instr->op == nir_texop_txb) - args.bias = get_src(ctx, instr->src[i].src); + args.bias = get_src(ctx, instr->src[i].src); break; case nir_tex_src_lod: { if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) @@ -4418,6 +4670,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) case nir_tex_src_ddy: ddy = get_src(ctx, instr->src[i].src); break; + case nir_tex_src_min_lod: + args.min_lod = get_src(ctx, instr->src[i].src); + break; case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: case nir_tex_src_plane: @@ -4433,6 +4688,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; + LLVMValueRef default_sample; + res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), ""); @@ -4449,8 +4706,27 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) LLVMConstInt(ctx->ac.i32, 0xf, false), ""); samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); + + if (ctx->abi->robust_buffer_access) { + LLVMValueRef dword1, is_null_descriptor; + + /* Extract the second dword of the descriptor, if it's + * all zero, then it's a null descriptor. + */ + dword1 = LLVMBuildExtractElement(ctx->ac.builder, res, + LLVMConstInt(ctx->ac.i32, 1, false), ""); + is_null_descriptor = + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1, + LLVMConstInt(ctx->ac.i32, 0, false), ""); + default_sample = + LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, + ctx->ac.i32_0, ctx->ac.i32_1, ""); + } else { + default_sample = ctx->ac.i32_1; + } + samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, - ctx->ac.i32_1, ""); + default_sample, ""); result = samples; goto write_result; } @@ -4658,6 +4934,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } } + assert(instr->dest.is_ssa); + args.d16 = instr->dest.ssa.bit_size == 16; + result = build_tex_intrinsic(ctx, instr, &args); if (instr->op == nir_texop_query_levels) @@ -4728,19 +5007,46 @@ static void phi_post_pass(struct ac_nir_context *ctx) } +static bool is_def_used_in_an_export(const nir_ssa_def* def) { + nir_foreach_use(use_src, def) { + if (use_src->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr); + if (instr->intrinsic == nir_intrinsic_store_deref) + return true; + } else if (use_src->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr); + if (instr->op == nir_op_vec4 && + is_def_used_in_an_export(&instr->dest.dest.ssa)) { + return true; + } + } + } + return false; +} + static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr) { unsigned num_components = instr->def.num_components; LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - LLVMValueRef undef; - if (num_components == 1) - undef = LLVMGetUndef(type); - else { - undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) { + LLVMValueRef undef; + + if (num_components == 1) + undef = LLVMGetUndef(type); + else { + undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + } + ctx->ssa_defs[instr->def.index] = undef; + } else { + LLVMValueRef zero = LLVMConstInt(type, 0, false); + if (num_components > 1) { + zero = ac_build_gather_values_extended( + &ctx->ac, &zero, 4, 0, false, false); + } + ctx->ssa_defs[instr->def.index] = zero; } - ctx->ssa_defs[instr->def.index] = undef; } static void visit_jump(struct ac_llvm_context *ctx, @@ -4882,7 +5188,7 @@ static void visit_deref(struct ac_nir_context *ctx, break; case nir_deref_type_ptr_as_array: if (instr->mode == nir_var_mem_global) { - unsigned stride = nir_deref_instr_ptr_as_array_stride(instr); + unsigned stride = nir_deref_instr_array_stride(instr); LLVMValueRef index = get_src(ctx, instr->arr.index); if (LLVMTypeOf(index) != ctx->ac.i64) @@ -5085,7 +5391,7 @@ setup_locals(struct ac_nir_context *ctx, { int i, j; ctx->num_locals = 0; - nir_foreach_variable(variable, &func->impl->locals) { + nir_foreach_function_temp_variable(variable, func->impl) { unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); variable->data.driver_location = ctx->num_locals * 4; variable->data.location_frac = 0; @@ -5185,9 +5491,13 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); - nir_foreach_variable(variable, &nir->outputs) - ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, - ctx.stage); + /* TODO: remove this after RADV switches to lowered IO */ + if (!nir->info.io_lowered) { + nir_foreach_shader_out_variable(variable, nir) { + ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, + ctx.stage); + } + } ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -5196,6 +5506,10 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + if (ctx.abi->kill_ps_if_inf_interp) + ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + func = (struct nir_function *)exec_list_get_head(&nir->functions); nir_index_ssa_defs(func->impl); @@ -5230,6 +5544,8 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ralloc_free(ctx.defs); ralloc_free(ctx.phis); ralloc_free(ctx.vars); + if (ctx.abi->kill_ps_if_inf_interp) + ralloc_free(ctx.verified_interp); } bool @@ -5276,33 +5592,26 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) */ indirect_mask |= nir_var_function_temp; - progress |= nir_lower_indirect_derefs(nir, indirect_mask); + progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX); return progress; } static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) { - if (intrin->intrinsic != nir_intrinsic_store_deref) - return 0; - - nir_variable *var = - nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); - - if (var->data.mode != nir_var_shader_out) + if (intrin->intrinsic != nir_intrinsic_store_output) return 0; - unsigned writemask = 0; - const int location = var->data.location; - unsigned first_component = var->data.location_frac; - unsigned num_comps = intrin->dest.ssa.num_components; + unsigned writemask = nir_intrinsic_write_mask(intrin) << + nir_intrinsic_component(intrin); + unsigned location = nir_intrinsic_io_semantics(intrin).location; - if (location == VARYING_SLOT_TESS_LEVEL_INNER) - writemask = ((1 << (num_comps + 1)) - 1) << first_component; - else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) - writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4; + if (location == VARYING_SLOT_TESS_LEVEL_OUTER) + return writemask << 4; + else if (location == VARYING_SLOT_TESS_LEVEL_INNER) + return writemask; - return writemask; + return 0; } static void