X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_nir_to_llvm.c;h=8dea35178b36827195ba46c2e246384aae4d3982;hb=9a45a190ad22849a492506389413046948e0b093;hp=10d1773850924971e3dc8f69cdca0743bde192ae;hpb=3d41757788aca774e64297bed962696cc0c9b262;p=mesa.git diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 10d17738509..8dea35178b3 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -270,8 +270,9 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, { LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""); - return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1), - ac_to_integer(ctx, src2), ""); + return LLVMBuildSelect(ctx->builder, v, + ac_to_integer_or_pointer(ctx, src1), + ac_to_integer_or_pointer(ctx, src2), ""); } static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx, @@ -311,9 +312,18 @@ static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, } static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, + unsigned bitsize) { - return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, + LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), + ""); + result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, ""); + + if (bitsize == 32) + return result; + + return LLVMBuildFPExt(ctx->builder, result, ctx->f64, ""); } static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, @@ -419,15 +429,15 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, { LLVMValueRef result; - if (HAVE_LLVM < 0x0700) { + if (HAVE_LLVM >= 0x0800) { LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); } else { - /* FIXME: LLVM 7 returns incorrect result when count is 0. + /* FIXME: LLVM 7+ returns incorrect result when count is 0. * https://bugs.freedesktop.org/show_bug.cgi?id=107276 */ - LLVMValueRef zero = LLVMConstInt(ctx->i32, 0, false); + LLVMValueRef zero = ctx->i32_0; LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, ""); @@ -478,14 +488,15 @@ static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); - return ac_build_cvt_pkrtz_f16(ctx, comp); + return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp), + ctx->i32, ""); } static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); - LLVMValueRef temps[2], result, val; + LLVMValueRef temps[2], val; int i; for (i = 0; i < 2; i++) { @@ -494,12 +505,7 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); } - - result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], - ctx->i32_0, ""); - result = LLVMBuildInsertElement(ctx->builder, result, temps[1], - ctx->i32_1, ""); - return result; + return ac_build_gather_values(ctx, temps, 2); } static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, @@ -681,34 +687,34 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) LLVMTypeOf(src[0]), ""), ""); break; - case nir_op_ilt: + case nir_op_ilt32: result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]); break; - case nir_op_ine: + case nir_op_ine32: result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]); break; - case nir_op_ieq: + case nir_op_ieq32: result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]); break; - case nir_op_ige: + case nir_op_ige32: result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]); break; - case nir_op_ult: + case nir_op_ult32: result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]); break; - case nir_op_uge: + case nir_op_uge32: result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]); break; - case nir_op_feq: + case nir_op_feq32: result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]); break; - case nir_op_fne: + case nir_op_fne32: result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); break; - case nir_op_flt: + case nir_op_flt32: result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]); break; - case nir_op_fge: + case nir_op_fge32: result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]); break; case nir_op_fabs: @@ -840,15 +846,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); break; case nir_op_bitfield_reverse: - result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_bitfield_reverse(&ctx->ac, src[0]); break; case nir_op_bit_count: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32) - result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); - else { - result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE); - result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); - } + result = ac_build_bit_count(&ctx->ac, src[0]); break; case nir_op_vec2: case nir_op_vec3: @@ -857,34 +858,47 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[i] = ac_to_integer(&ctx->ac, src[i]); result = ac_build_gather_values(&ctx->ac, src, num_components); break; + case nir_op_f2i16: case nir_op_f2i32: case nir_op_f2i64: src[0] = ac_to_float(&ctx->ac, src[0]); result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); break; + case nir_op_f2u16: case nir_op_f2u32: case nir_op_f2u64: src[0] = ac_to_float(&ctx->ac, src[0]); result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); break; + case nir_op_i2f16: case nir_op_i2f32: case nir_op_i2f64: src[0] = ac_to_integer(&ctx->ac, src[0]); result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; + case nir_op_u2f16: case nir_op_u2f32: case nir_op_u2f64: src[0] = ac_to_integer(&ctx->ac, src[0]); result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; - case nir_op_f2f64: + case nir_op_f2f16_rtz: src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 }; + result = ac_build_cvt_pkrtz_f16(&ctx->ac, param); + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); break; + case nir_op_f2f16_rtne: + case nir_op_f2f16: case nir_op_f2f32: + case nir_op_f2f64: src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + else + result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); break; + case nir_op_u2u16: case nir_op_u2u32: case nir_op_u2u64: src[0] = ac_to_integer(&ctx->ac, src[0]); @@ -893,6 +907,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) else result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; + case nir_op_i2i16: case nir_op_i2i32: case nir_op_i2i64: src[0] = ac_to_integer(&ctx->ac, src[0]); @@ -901,7 +916,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) else result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; - case nir_op_bcsel: + case nir_op_b32csel: result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); break; case nir_op_find_lsb: @@ -926,16 +941,20 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[1] = ac_to_integer(&ctx->ac, src[1]); result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); break; - case nir_op_b2f: - result = emit_b2f(&ctx->ac, src[0]); + case nir_op_b2f16: + case nir_op_b2f32: + case nir_op_b2f64: + result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; - case nir_op_f2b: + case nir_op_f2b32: result = emit_f2b(&ctx->ac, src[0]); break; - case nir_op_b2i: + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; - case nir_op_i2b: + case nir_op_i2b32: src[0] = ac_to_integer(&ctx->ac, src[0]); result = emit_i2b(&ctx->ac, src[0]); break; @@ -989,10 +1008,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_pack_64_2x32_split: { LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32); - tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, - src[0], ctx->ac.i32_0, ""); - tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, - src[1], ctx->ac.i32_1, ""); + tmp = ac_build_gather_values(&ctx->ac, src, 2); result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); break; } @@ -1084,7 +1100,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) if (result) { assert(instr->dest.dest.is_ssa); - result = ac_to_integer(&ctx->ac, result); + result = ac_to_integer_or_pointer(&ctx->ac, result); ctx->ssa_defs[instr->dest.dest.ssa.index] = result; } } @@ -1098,6 +1114,10 @@ static void visit_load_const(struct ac_nir_context *ctx, for (unsigned i = 0; i < instr->def.num_components; ++i) { switch (instr->def.bit_size) { + case 16: + values[i] = LLVMConstInt(element_type, + instr->value.u16[i], false); + break; case 32: values[i] = LLVMConstInt(element_type, instr->value.u32[i], false); @@ -1152,7 +1172,8 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args, const nir_tex_instr *instr) { - enum glsl_base_type stype = glsl_get_sampler_result_type(var->type); + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type stype = glsl_get_sampler_result_type(type); LLVMValueRef half_texel[2]; LLVMValueRef compare_cube_wa = NULL; LLVMValueRef result; @@ -1337,7 +1358,8 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr); nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr); - enum glsl_base_type stype = glsl_get_sampler_result_type(var->type); + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type stype = glsl_get_sampler_result_type(type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { return lower_gather4_integer(&ctx->ac, var, args, instr); } @@ -1376,6 +1398,26 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, get_src(ctx, instr->src[0]), ""); ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr); + + if (instr->dest.ssa.bit_size == 16) { + unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1; + LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords); + ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); + LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, ""); + LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, ""); + cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); + LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), + LLVMConstInt(ctx->ac.i32, 4, false)}; + LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components); + LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components); + LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, ""); + LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, ""); + res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, ""); + return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), ""); + } + ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa)); return LLVMBuildLoad(ctx->ac.builder, ptr, ""); @@ -1401,31 +1443,24 @@ static uint32_t widen_mask(uint32_t mask, unsigned multiplier) static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned start, unsigned count) { - LLVMTypeRef type = LLVMTypeOf(src); + LLVMValueRef mask[] = { + ctx->i32_0, ctx->i32_1, + LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) }; - if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) { + unsigned src_elements = ac_get_llvm_num_components(src); + + if (count == src_elements) { assert(start == 0); - assert(count == 1); return src; + } else if (count == 1) { + assert(start < src_elements); + return LLVMBuildExtractElement(ctx->builder, src, mask[start], ""); + } else { + assert(start + count <= src_elements); + assert(count <= 4); + LLVMValueRef swizzle = LLVMConstVector(&mask[start], count); + return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); } - - unsigned src_elements = LLVMGetVectorSize(type); - assert(start < src_elements); - assert(start + count <= src_elements); - - if (start == 0 && count == src_elements) - return src; - - if (count == 1) - return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), ""); - - assert(count <= 8); - LLVMValueRef indices[8]; - for (unsigned i = 0; i < count; ++i) - indices[i] = LLVMConstInt(ctx->i32, start + i, false); - - LLVMValueRef swizzle = LLVMConstVector(indices, count); - return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); } static void visit_store_ssbo(struct ac_nir_context *ctx, @@ -1433,33 +1468,24 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, { const char *store_name; LLVMValueRef src_data = get_src(ctx, instr->src[0]); - LLVMTypeRef data_type = ctx->ac.f32; - int elem_size_mult = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32; - int components_32bit = elem_size_mult * instr->num_components; + int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; unsigned writemask = nir_intrinsic_write_mask(instr); - LLVMValueRef base_data, base_offset; - LLVMValueRef params[6]; - - params[1] = ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[1]), true); - params[2] = ctx->ac.i32_0; /* vindex */ - params[4] = ctx->ac.i1false; /* glc */ - params[5] = ctx->ac.i1false; /* slc */ - - if (components_32bit > 1) - data_type = LLVMVectorType(ctx->ac.f32, components_32bit); + enum gl_access_qualifier access = nir_intrinsic_access(instr); + LLVMValueRef glc = ctx->ac.i1false; - writemask = widen_mask(writemask, elem_size_mult); + if (access & (ACCESS_VOLATILE | ACCESS_COHERENT)) + glc = ctx->ac.i1true; - base_data = ac_to_float(&ctx->ac, src_data); + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[1]), true); + LLVMValueRef base_data = ac_to_float(&ctx->ac, src_data); base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components); - base_data = LLVMBuildBitCast(ctx->ac.builder, base_data, - data_type, ""); - base_offset = get_src(ctx, instr->src[2]); /* voffset */ + LLVMValueRef base_offset = get_src(ctx, instr->src[2]); + while (writemask) { int start, count; - LLVMValueRef data; - LLVMValueRef offset; + LLVMValueRef data, offset; + LLVMTypeRef data_type; u_bit_scan_consecutive_range(&writemask, &start, &count); @@ -1469,31 +1495,76 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, writemask |= 1 << (start + 2); count = 2; } + int num_bytes = count * elem_size_bytes; /* count in bytes */ - if (count > 4) { - writemask |= ((1u << (count - 4)) - 1u) << (start + 4); - count = 4; + /* we can only store 4 DWords at the same time. + * can only happen for 64 Bit vectors. */ + if (num_bytes > 16) { + writemask |= ((1u << (count - 2)) - 1u) << (start + 2); + count = 2; + num_bytes = 16; } - if (count == 4) { - store_name = "llvm.amdgcn.buffer.store.v4f32"; - } else if (count == 2) { - store_name = "llvm.amdgcn.buffer.store.v2f32"; - - } else { - assert(count == 1); - store_name = "llvm.amdgcn.buffer.store.f32"; + /* check alignment of 16 Bit stores */ + if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) { + writemask |= ((1u << (count - 1)) - 1u) << (start + 1); + count = 1; + num_bytes = 2; } data = extract_vector_range(&ctx->ac, base_data, start, count); - offset = base_offset; - if (start != 0) { - offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), ""); + if (start == 0) { + offset = base_offset; + } else { + offset = LLVMBuildAdd(ctx->ac.builder, base_offset, + LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), ""); + } + if (num_bytes == 2) { + store_name = "llvm.amdgcn.tbuffer.store.i32"; + data_type = ctx->ac.i32; + LLVMValueRef tbuffer_params[] = { + data, + rsrc, + ctx->ac.i32_0, /* vindex */ + offset, /* voffset */ + ctx->ac.i32_0, + ctx->ac.i32_0, + LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 16bit) + LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= uint) + glc, + ctx->ac.i1false, + }; + ac_build_intrinsic(&ctx->ac, store_name, + ctx->ac.voidt, tbuffer_params, 10, 0); + } else { + switch (num_bytes) { + case 16: /* v4f32 */ + store_name = "llvm.amdgcn.buffer.store.v4f32"; + data_type = ctx->ac.v4f32; + break; + case 8: /* v2f32 */ + store_name = "llvm.amdgcn.buffer.store.v2f32"; + data_type = ctx->ac.v2f32; + break; + case 4: /* f32 */ + store_name = "llvm.amdgcn.buffer.store.f32"; + data_type = ctx->ac.f32; + break; + default: + unreachable("Malformed vector store."); + } + data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, ""); + LLVMValueRef params[] = { + data, + rsrc, + ctx->ac.i32_0, /* vindex */ + offset, + glc, + ctx->ac.i1false, /* slc */ + }; + ac_build_intrinsic(&ctx->ac, store_name, + ctx->ac.voidt, params, 6, 0); } - params[0] = data; - params[3] = offset; - ac_build_intrinsic(&ctx->ac, store_name, - ctx->ac.voidt, params, 6, 0); } } @@ -1513,7 +1584,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, true); params[arg_count++] = ctx->ac.i32_0; /* vindex */ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ - params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */ + params[arg_count++] = ctx->ac.i1false; /* slc */ switch (instr->intrinsic) { case nir_intrinsic_ssbo_atomic_add: @@ -1556,68 +1627,87 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - LLVMValueRef results[2]; - int load_components; + int elem_size_bytes = instr->dest.ssa.bit_size / 8; int num_components = instr->num_components; - if (instr->dest.ssa.bit_size == 64) - num_components *= 2; + enum gl_access_qualifier access = nir_intrinsic_access(instr); + LLVMValueRef glc = ctx->ac.i1false; - for (int i = 0; i < num_components; i += load_components) { - load_components = MIN2(num_components - i, 4); - const char *load_name; - LLVMTypeRef data_type = ctx->ac.f32; - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false); - offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, ""); - - if (load_components == 3) - data_type = LLVMVectorType(ctx->ac.f32, 4); - else if (load_components > 1) - data_type = LLVMVectorType(ctx->ac.f32, load_components); - - if (load_components >= 3) - load_name = "llvm.amdgcn.buffer.load.v4f32"; - else if (load_components == 2) - load_name = "llvm.amdgcn.buffer.load.v2f32"; - else if (load_components == 1) - load_name = "llvm.amdgcn.buffer.load.f32"; - else - unreachable("unhandled number of components"); - - LLVMValueRef params[] = { - ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[0]), - false), - ctx->ac.i32_0, - offset, - ctx->ac.i1false, - ctx->ac.i1false, - }; + if (access & (ACCESS_VOLATILE | ACCESS_COHERENT)) + glc = ctx->ac.i1true; - results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); - } + LLVMValueRef offset = get_src(ctx, instr->src[1]); + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[0]), false); + LLVMValueRef vindex = ctx->ac.i32_0; + + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa); + LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type; + + LLVMValueRef results[4]; + for (int i = 0; i < num_components;) { + int num_elems = num_components - i; + if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0) + num_elems = 1; + if (num_elems * elem_size_bytes > 16) + num_elems = 16 / elem_size_bytes; + int load_bytes = num_elems * elem_size_bytes; + + LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false); + + LLVMValueRef ret; + if (load_bytes == 2) { + ret = ac_build_tbuffer_load_short(&ctx->ac, + rsrc, + vindex, + offset, + ctx->ac.i32_0, + immoffset, + glc); + } else { + const char *load_name; + LLVMTypeRef data_type; + switch (load_bytes) { + case 16: + case 12: + load_name = "llvm.amdgcn.buffer.load.v4f32"; + data_type = ctx->ac.v4f32; + break; + case 8: + case 6: + load_name = "llvm.amdgcn.buffer.load.v2f32"; + data_type = ctx->ac.v2f32; + break; + case 4: + load_name = "llvm.amdgcn.buffer.load.f32"; + data_type = ctx->ac.f32; + break; + default: + unreachable("Malformed load buffer."); + } + LLVMValueRef params[] = { + rsrc, + vindex, + LLVMBuildAdd(ctx->ac.builder, offset, immoffset, ""), + glc, + ctx->ac.i1false, + }; + ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); + } - assume(results[0]); - LLVMValueRef ret = results[0]; - if (num_components > 4 || num_components == 3) { - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false), - LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false) - }; + LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, ""); + ret = ac_trim_vector(&ctx->ac, ret, load_bytes); - if (num_components == 6) { - /* we end up with a v4f32 and v2f32 but shuffle fails on that */ - results[1] = ac_build_expand_to_vec4(&ctx->ac, results[1], 4); - } + LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, ""); - LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0], - results[num_components > 4 ? 1 : 0], swizzle, ""); + for (unsigned j = 0; j < num_elems; j++) { + results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), ""); + } + i += num_elems; } - return LLVMBuildBitCast(ctx->ac.builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); + return ac_build_gather_values(&ctx->ac, results, num_components); } static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, @@ -1634,9 +1724,25 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, if (instr->dest.ssa.bit_size == 64) num_components *= 2; - ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, - NULL, 0, false, false, true, true); - ret = ac_trim_vector(&ctx->ac, ret, num_components); + if (instr->dest.ssa.bit_size == 16) { + LLVMValueRef results[num_components]; + for (unsigned i = 0; i < num_components; ++i) { + results[i] = ac_build_tbuffer_load_short(&ctx->ac, + rsrc, + ctx->ac.i32_0, + offset, + ctx->ac.i32_0, + LLVMConstInt(ctx->ac.i32, 2 * i, 0), + ctx->ac.i1false); + } + ret = ac_build_gather_values(&ctx->ac, results, num_components); + } else { + ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, + NULL, 0, false, false, true, true); + + ret = ac_trim_vector(&ctx->ac, ret, num_components); + } + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } @@ -1744,6 +1850,10 @@ static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, var->data.location_frac, instr->num_components, is_patch, is_compact, load_inputs); + if (instr->dest.ssa.bit_size == 16) { + result = ac_to_integer(&ctx->ac, result); + result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, ""); + } return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); } @@ -1753,23 +1863,32 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); LLVMValueRef values[8]; - int idx = var->data.driver_location; + int idx = 0; int ve = instr->dest.ssa.num_components; - unsigned comp = var->data.location_frac; + unsigned comp = 0; LLVMValueRef indir_index; LLVMValueRef ret; unsigned const_index; - unsigned stride = var->data.compact ? 1 : 4; - bool vs_in = ctx->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in; - - get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL, - &const_index, &indir_index); + unsigned stride = 4; + int mode = nir_var_shared; + + if (var) { + bool vs_in = ctx->stage == MESA_SHADER_VERTEX && + var->data.mode == nir_var_shader_in; + if (var->data.compact) + stride = 1; + idx = var->data.driver_location; + comp = var->data.location_frac; + mode = var->data.mode; + + get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL, + &const_index, &indir_index); + } if (instr->dest.ssa.bit_size == 64) ve *= 2; - switch (var->data.mode) { + switch (mode) { case nir_var_shader_in: if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { @@ -2104,7 +2223,7 @@ static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx, enum ac_descriptor_type desc_type, bool write) { - return get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), desc_type, NULL, true, true); + return get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), desc_type, NULL, true, write); } static void get_image_coords(struct ac_nir_context *ctx, @@ -2130,7 +2249,7 @@ static void get_image_coords(struct ac_nir_context *ctx, bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; count = image_type_to_components_count(dim, is_array); - if (is_ms) { + if (is_ms && instr->intrinsic == nir_intrinsic_image_deref_load) { LLVMValueRef fmask_load_address[3]; int chan; @@ -2153,7 +2272,8 @@ static void get_image_coords(struct ac_nir_context *ctx, fmask_load_address[1], fmask_load_address[2], sample_index, - get_image_descriptor(ctx, instr, AC_DESC_FMASK, false)); + get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), + AC_DESC_FMASK, NULL, false, false)); } if (count == 1 && !gfx9_1d) { if (instr->src[1].ssa->num_components) @@ -2251,7 +2371,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, glsl_sampler_type_is_array(type)); args.dmask = 15; args.attributes = AC_FUNC_ATTR_READONLY; - if (var->data.image._volatile || var->data.image.coherent) + if (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT)) args.cache_policy |= ac_glc; res = ac_build_image_opcode(&ctx->ac, &args); @@ -2272,17 +2392,33 @@ static void visit_image_store(struct ac_nir_context *ctx, glc = ctx->ac.i1true; if (dim == GLSL_SAMPLER_DIM_BUF) { + char name[48]; + const char *types[] = { "f32", "v2f32", "v4f32" }; LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true); + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); + unsigned src_channels = ac_get_llvm_num_components(src); + + if (src_channels == 3) + src = ac_build_expand(&ctx->ac, src, 3, 4); - params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); /* data */ + params[0] = src; /* data */ params[1] = rsrc; params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[3] = ctx->ac.i32_0; /* voffset */ - params[4] = glc; /* glc */ - params[5] = ctx->ac.i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, - params, 6, 0); + snprintf(name, sizeof(name), "%s.%s", + HAVE_LLVM >= 0x800 ? "llvm.amdgcn.struct.buffer.store.format" + : "llvm.amdgcn.buffer.store.format", + types[CLAMP(src_channels, 1, 3) - 1]); + + if (HAVE_LLVM >= 0x800) { + params[4] = ctx->ac.i32_0; /* soffset */ + params[5] = glc ? ctx->ac.i32_1 : ctx->ac.i32_0; + } else { + params[4] = glc; /* glc */ + params[5] = ctx->ac.i1false; /* slc */ + } + ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0); } else { struct ac_image_args args = {}; args.opcode = ac_image_store; @@ -2292,7 +2428,7 @@ static void visit_image_store(struct ac_nir_context *ctx, args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); args.dmask = 15; - if (force_glc || var->data.image._volatile || var->data.image.coherent) + if (force_glc || (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT))) args.cache_policy |= ac_glc; ac_build_image_opcode(&ctx->ac, &args); @@ -2309,7 +2445,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap; const char *atomic_name; - char intrinsic_name[41]; + char intrinsic_name[64]; enum ac_atomic_op atomic_subop; const struct glsl_type *type = glsl_without_array(var->type); MAYBE_UNUSED int length; @@ -2362,10 +2498,18 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[param_count++] = ctx->ac.i32_0; /* voffset */ - params[param_count++] = ctx->ac.i1false; /* slc */ + if (HAVE_LLVM >= 0x800) { + params[param_count++] = ctx->ac.i32_0; /* soffset */ + params[param_count++] = ctx->ac.i32_0; /* slc */ - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", atomic_name); + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name); + } else { + params[param_count++] = ctx->ac.i1false; /* slc */ + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", atomic_name); + } assert(length < sizeof(intrinsic_name)); return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, @@ -2446,10 +2590,6 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, return res; } -#define NOOP_WAITCNT 0xf7f -#define LGKM_CNT 0x07f -#define VM_CNT 0xf70 - static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr) { @@ -2485,8 +2625,7 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); return; } - ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier", - ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); + ac_build_s_barrier(ac); } static void emit_discard(struct ac_nir_context *ctx, @@ -2500,7 +2639,7 @@ static void emit_discard(struct ac_nir_context *ctx, ctx->ac.i32_0, ""); } else { assert(instr->intrinsic == nir_intrinsic_discard); - cond = LLVMConstInt(ctx->ac.i1, false, 0); + cond = ctx->ac.i1false; } ctx->abi->emit_kill(ctx->abi, cond); @@ -2558,7 +2697,7 @@ visit_first_invocation(struct ac_nir_context *ctx) LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); /* The second argument is whether cttz(0) should be defined, but we do not care. */ - LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)}; + LLVMValueRef args[] = {active_set, ctx->ac.i1false}; LLVMValueRef result = ac_build_intrinsic(&ctx->ac, "llvm.cttz.i64", ctx->ac.i64, args, 2, @@ -2757,11 +2896,8 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx, interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, ""); - temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, ""); - temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, ""); - - temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, ""); - temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, ""); + temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el); + temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1); ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, ""); @@ -3232,7 +3368,7 @@ static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord) { coord = ac_to_float(ctx, coord); - coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); + coord = ac_build_round(ctx, coord); coord = ac_to_integer(ctx, coord); return coord; } @@ -3544,7 +3680,6 @@ static void visit_post_phi(struct ac_nir_context *ctx, static void phi_post_pass(struct ac_nir_context *ctx) { - struct hash_entry *entry; hash_table_foreach(ctx->phis, entry) { visit_post_phi(ctx, (nir_phi_instr*)entry->key, (LLVMValueRef)entry->data); @@ -3606,6 +3741,9 @@ static void visit_deref(struct ac_nir_context *ctx, result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index)); break; + case nir_deref_type_cast: + result = get_src(ctx, instr->parent); + break; default: unreachable("Unhandled deref_instr deref type"); } @@ -3744,10 +3882,12 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx, } } + bool is_16bit = glsl_type_is_16bit(variable->type); + LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32; for (unsigned i = 0; i < attrib_count; ++i) { for (unsigned chan = 0; chan < 4; chan++) { abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] = - ac_build_alloca_undef(ctx, ctx->f32, ""); + ac_build_alloca_undef(ctx, type, ""); } } } @@ -3847,7 +3987,7 @@ setup_shared(struct ac_nir_context *ctx, LLVMAddGlobalInAddressSpace( ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type), variable->name ? variable->name : "", - AC_LOCAL_ADDR_SPACE); + AC_ADDR_SPACE_LDS); _mesa_hash_table_insert(ctx->vars, variable, shared); } } @@ -3936,3 +4076,164 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) nir_lower_indirect_derefs(nir, indirect_mask); } + +static unsigned +get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) +{ + if (intrin->intrinsic != nir_intrinsic_store_deref) + return 0; + + nir_variable *var = + nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); + + if (var->data.mode != nir_var_shader_out) + return 0; + + unsigned writemask = 0; + const int location = var->data.location; + unsigned first_component = var->data.location_frac; + unsigned num_comps = intrin->dest.ssa.num_components; + + if (location == VARYING_SLOT_TESS_LEVEL_INNER) + writemask = ((1 << num_comps + 1) - 1) << first_component; + else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) + writemask = (((1 << num_comps + 1) - 1) << first_component) << 4; + + return writemask; +} + +static void +scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, + unsigned *cond_block_tf_writemask, + bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) +{ + switch (cf_node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(cf_node); + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_barrier) { + + /* If we find a barrier in nested control flow put this in the + * too hard basket. In GLSL this is not possible but it is in + * SPIR-V. + */ + if (is_nested_cf) { + *tessfactors_are_def_in_all_invocs = false; + return; + } + + /* The following case must be prevented: + * gl_TessLevelInner = ...; + * barrier(); + * if (gl_InvocationID == 1) + * gl_TessLevelInner = ...; + * + * If you consider disjoint code segments separated by barriers, each + * such segment that writes tess factor channels should write the same + * channels in all codepaths within that segment. + */ + if (upper_block_tf_writemask || cond_block_tf_writemask) { + /* Accumulate the result: */ + *tessfactors_are_def_in_all_invocs &= + !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); + + /* Analyze the next code segment from scratch. */ + *upper_block_tf_writemask = 0; + *cond_block_tf_writemask = 0; + } + } else + *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); + } + + break; + } + case nir_cf_node_if: { + unsigned then_tessfactor_writemask = 0; + unsigned else_tessfactor_writemask = 0; + + nir_if *if_stmt = nir_cf_node_as_if(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) { + scan_tess_ctrl(nested_node, &then_tessfactor_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) { + scan_tess_ctrl(nested_node, &else_tessfactor_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + if (then_tessfactor_writemask || else_tessfactor_writemask) { + /* If both statements write the same tess factor channels, + * we can say that the upper block writes them too. + */ + *upper_block_tf_writemask |= then_tessfactor_writemask & + else_tessfactor_writemask; + *cond_block_tf_writemask |= then_tessfactor_writemask | + else_tessfactor_writemask; + } + + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) { + scan_tess_ctrl(nested_node, cond_block_tf_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + break; + } + default: + unreachable("unknown cf node type"); + } +} + +bool +ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_TESS_CTRL); + + /* The pass works as follows: + * If all codepaths write tess factors, we can say that all + * invocations define tess factors. + * + * Each tess factor channel is tracked separately. + */ + unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ + + /* Initial value = true. Here the pass will accumulate results from + * multiple segments surrounded by barriers. If tess factors aren't + * written at all, it's a shader bug and we don't care if this will be + * true. + */ + bool tessfactors_are_def_in_all_invocs = true; + + nir_foreach_function(function, nir) { + if (function->impl) { + foreach_list_typed(nir_cf_node, node, node, &function->impl->body) { + scan_tess_ctrl(node, &main_block_tf_writemask, + &cond_block_tf_writemask, + &tessfactors_are_def_in_all_invocs, + false); + } + } + } + + /* Accumulate the result for the last code segment separated by a + * barrier. + */ + if (main_block_tf_writemask || cond_block_tf_writemask) { + tessfactors_are_def_in_all_invocs &= + !(cond_block_tf_writemask & ~main_block_tf_writemask); + } + + return tessfactors_are_def_in_all_invocs; +}