X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_nir_to_llvm.c;h=58f512ea99724e9edcc8e2090b21f7f0eb97847d;hb=762a6333f21fd8606f69db6060027c4522d46678;hp=ddec74fa1ab0aed0432ac26701357f5fb089fe47;hpb=66463b7f7540ed3d21ca6ae7c729fbb671453188;p=mesa.git diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ddec74fa1ab..58f512ea997 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -22,6 +22,7 @@ */ #include "ac_nir_to_llvm.h" +#include "ac_llvm_build.h" #include "ac_llvm_util.h" #include "ac_binary.h" #include "sid.h" @@ -43,14 +44,6 @@ enum radeon_llvm_calling_convention { #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) -#define SENDMSG_GS 2 -#define SENDMSG_GS_DONE 3 - -#define SENDMSG_GS_OP_NOP (0 << 4) -#define SENDMSG_GS_OP_CUT (1 << 4) -#define SENDMSG_GS_OP_EMIT (2 << 4) -#define SENDMSG_GS_OP_EMIT_CUT (3 << 4) - enum desc_type { DESC_IMAGE, DESC_FMASK, @@ -106,6 +99,7 @@ struct nir_to_llvm_context { LLVMValueRef linear_sample, linear_center, linear_centroid; LLVMValueRef front_face; LLVMValueRef ancillary; + LLVMValueRef sample_coverage; LLVMValueRef frag_pos[4]; LLVMBasicBlockRef continue_block; @@ -128,6 +122,8 @@ struct nir_to_llvm_context { LLVMTypeRef v16i8; LLVMTypeRef voidt; + LLVMValueRef i1true; + LLVMValueRef i1false; LLVMValueRef i32zero; LLVMValueRef i32one; LLVMValueRef f32zero; @@ -148,8 +144,10 @@ struct nir_to_llvm_context { int num_locals; LLVMValueRef *locals; bool has_ddxy; - unsigned num_clips; - unsigned num_culls; + uint8_t num_input_clips; + uint8_t num_input_culls; + uint8_t num_output_clips; + uint8_t num_output_culls; bool has_ds_bpermute; @@ -158,13 +156,6 @@ struct nir_to_llvm_context { unsigned gs_max_out_vertices; }; -struct ac_tex_info { - LLVMValueRef args[12]; - int arg_count; - LLVMTypeRef dst_type; - bool has_offset; -}; - static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, nir_deref_var *deref, enum desc_type desc_type); @@ -179,9 +170,11 @@ static unsigned shader_io_get_unique_index(gl_varying_slot slot) return 0; if (slot == VARYING_SLOT_PSIZ) return 1; - if (slot == VARYING_SLOT_CLIP_DIST0) + if (slot == VARYING_SLOT_CLIP_DIST0 || + slot == VARYING_SLOT_CULL_DIST0) return 2; - if (slot == VARYING_SLOT_CLIP_DIST1) + if (slot == VARYING_SLOT_CLIP_DIST1 || + slot == VARYING_SLOT_CULL_DIST1) return 3; if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) return 4 + (slot - VARYING_SLOT_VAR0); @@ -264,11 +257,11 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, for (unsigned i = 0; i < sgpr_params; ++i) { if (array_params_mask & (1 << i)) { LLVMValueRef P = LLVMGetParam(main_function, i); - ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL); ac_add_attr_dereferenceable(P, UINT64_MAX); } else { - ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); } } @@ -535,7 +528,7 @@ static void create_function(struct nir_to_llvm_context *ctx) arg_types[arg_idx++] = ctx->f32; /* pos w float */ arg_types[arg_idx++] = ctx->i32; /* front face */ arg_types[arg_idx++] = ctx->i32; /* ancillary */ - arg_types[arg_idx++] = ctx->f32; /* sample coverage */ + arg_types[arg_idx++] = ctx->i32; /* sample coverage */ arg_types[arg_idx++] = ctx->i32; /* fixed pt */ break; default: @@ -569,9 +562,9 @@ static void create_function(struct nir_to_llvm_context *ctx) set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, user_sgpr_idx, 2); user_sgpr_idx += 2; if (ctx->options->supports_spill) { - ctx->ring_offsets = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->i8, CONST_ADDR_SPACE), - NULL, 0, AC_FUNC_ATTR_READNONE); + ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", + LLVMPointerType(ctx->i8, CONST_ADDR_SPACE), + NULL, 0, AC_FUNC_ATTR_READNONE); ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets, const_array(ctx->v16i8, 8), ""); } else @@ -662,6 +655,7 @@ static void create_function(struct nir_to_llvm_context *ctx) ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++); ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++); ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++); + ctx->sample_coverage = LLVMGetParam(ctx->main_function, arg_idx++); break; default: unreachable("Shader stage not implemented"); @@ -689,6 +683,8 @@ static void setup_types(struct nir_to_llvm_context *ctx) ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v16i8 = LLVMVectorType(ctx->i8, 16); + ctx->i1false = LLVMConstInt(ctx->i1, 0, false); + ctx->i1true = LLVMConstInt(ctx->i1, 1, false); ctx->i32zero = LLVMConstInt(ctx->i32, 0, false); ctx->i32one = LLVMConstInt(ctx->i32, 1, false); ctx->f32zero = LLVMConstReal(ctx->f32, 0.0); @@ -874,7 +870,7 @@ static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx, }; sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type)); - return ac_emit_llvm_intrinsic(&ctx->ac, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); + return ac_build_intrinsic(&ctx->ac, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); } static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx, @@ -889,7 +885,7 @@ static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx, }; sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type)); - return ac_emit_llvm_intrinsic(&ctx->ac, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); + return ac_build_intrinsic(&ctx->ac, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); } static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx, @@ -905,7 +901,7 @@ static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx, }; sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type)); - return ac_emit_llvm_intrinsic(&ctx->ac, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); + return ac_build_intrinsic(&ctx->ac, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); } static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx, @@ -931,51 +927,19 @@ static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx, */ LLVMConstInt(ctx->i32, 1, false), }; - return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); + return ac_build_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); } static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx, LLVMValueRef src0) { - LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32", - ctx->i32, &src0, 1, - AC_FUNC_ATTR_READNONE); - - /* The HW returns the last bit index from MSB, but NIR wants - * the index from LSB. Invert it by doing "31 - msb". */ - msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), - msb, ""); - - LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); - LLVMValueRef cond = LLVMBuildOr(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntEQ, - src0, ctx->i32zero, ""), - LLVMBuildICmp(ctx->builder, LLVMIntEQ, - src0, all_ones, ""), ""); - - return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); + return ac_build_imsb(&ctx->ac, src0, ctx->i32); } static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx, LLVMValueRef src0) { - LLVMValueRef args[2] = { - src0, - ctx->i32one, - }; - LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32", - ctx->i32, args, ARRAY_SIZE(args), - AC_FUNC_ATTR_READNONE); - - /* The HW returns the last bit index from MSB, but NIR wants - * the index from LSB. Invert it by doing "31 - msb". */ - msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), - msb, ""); - - return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, - ctx->i32zero, ""), - LLVMConstInt(ctx->i32, -1, true), msb, ""); + return ac_build_umsb(&ctx->ac, src0, ctx->i32); } static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx, @@ -1027,9 +991,9 @@ static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx, LLVMValueRef params[] = { fsrc0, }; - LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr, - ctx->f32, params, 1, - AC_FUNC_ATTR_READNONE); + LLVMValueRef floor = ac_build_intrinsic(&ctx->ac, intr, + ctx->f32, params, 1, + AC_FUNC_ATTR_READNONE); return LLVMBuildFSub(ctx->builder, fsrc0, floor, ""); } @@ -1044,8 +1008,8 @@ static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx, ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true); - res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type, - params, 2, AC_FUNC_ATTR_READNONE); + res = ac_build_intrinsic(&ctx->ac, intrin, ret_type, + params, 2, AC_FUNC_ATTR_READNONE); res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); @@ -1085,13 +1049,13 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx, } static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx, - const char *intrin, + bool is_signed, LLVMValueRef srcs[3]) { LLVMValueRef result; LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE); + result = ac_build_bfe(&ctx->ac, srcs[0], srcs[1], srcs[2], is_signed); result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); return result; } @@ -1198,7 +1162,7 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx, else idx = 2; - result = ac_emit_ddxy(&ctx->ac, ctx->has_ds_bpermute, + result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute, mask, idx, ctx->lds, src0); return result; @@ -1295,7 +1259,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) case nir_op_fmod: src[0] = to_float(ctx, src[0]); src[1] = to_float(ctx, src[1]); - result = ac_emit_fdiv(&ctx->ac, src[0], src[1]); + result = ac_build_fdiv(&ctx->ac, src[0], src[1]); result = emit_intrin_1f_param(ctx, "llvm.floor", to_float_type(ctx, def_type), result); result = LLVMBuildFMul(ctx->builder, src[1] , result, ""); @@ -1323,11 +1287,11 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) case nir_op_fdiv: src[0] = to_float(ctx, src[0]); src[1] = to_float(ctx, src[1]); - result = ac_emit_fdiv(&ctx->ac, src[0], src[1]); + result = ac_build_fdiv(&ctx->ac, src[0], src[1]); break; case nir_op_frcp: src[0] = to_float(ctx, src[0]); - result = ac_emit_fdiv(&ctx->ac, ctx->f32one, src[0]); + result = ac_build_fdiv(&ctx->ac, ctx->f32one, src[0]); break; case nir_op_iand: result = LLVMBuildAnd(ctx->builder, src[0], src[1], ""); @@ -1445,7 +1409,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) case nir_op_frsq: result = emit_intrin_1f_param(ctx, "llvm.sqrt", to_float_type(ctx, def_type), src[0]); - result = ac_emit_fdiv(&ctx->ac, ctx->f32one, result); + result = ac_build_fdiv(&ctx->ac, ctx->f32one, result); break; case nir_op_fpow: result = emit_intrin_2f_param(ctx, "llvm.pow", @@ -1464,19 +1428,19 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) to_float_type(ctx, def_type), src[0], src[1], src[2]); break; case nir_op_ibitfield_extract: - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src); + result = emit_bitfield_extract(ctx, true, src); break; case nir_op_ubitfield_extract: - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src); + result = emit_bitfield_extract(ctx, false, src); break; case nir_op_bitfield_insert: result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]); break; case nir_op_bitfield_reverse: - result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: - result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_vec2: case nir_op_vec3: @@ -1485,30 +1449,44 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) src[i] = to_integer(ctx, src[i]); result = ac_build_gather_values(&ctx->ac, src, num_components); break; - case nir_op_d2i: - case nir_op_f2i: + case nir_op_f2i32: + case nir_op_f2i64: src[0] = to_float(ctx, src[0]); result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, ""); break; - case nir_op_d2u: - case nir_op_f2u: + case nir_op_f2u32: + case nir_op_f2u64: src[0] = to_float(ctx, src[0]); result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, ""); break; - case nir_op_i2d: - case nir_op_i2f: + case nir_op_i2f32: + case nir_op_i2f64: result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; - case nir_op_u2d: - case nir_op_u2f: + case nir_op_u2f32: + case nir_op_u2f64: result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; - case nir_op_f2d: + case nir_op_f2f64: result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; - case nir_op_d2f: + case nir_op_f2f32: result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; + case nir_op_u2u32: + case nir_op_u2u64: + if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) + result = LLVMBuildZExt(ctx->builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + break; + case nir_op_i2i32: + case nir_op_i2i64: + if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) + result = LLVMBuildSExt(ctx->builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + break; case nir_op_bcsel: result = emit_bcsel(ctx, src[0], src[1], src[2]); break; @@ -1655,40 +1633,30 @@ static void build_int_type_name( } static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, - struct ac_tex_info *tinfo, - nir_tex_instr *instr, - const char *intr_name, - unsigned coord_vgpr_index) + struct ac_image_args *args, + nir_tex_instr *instr) { - LLVMValueRef coord = tinfo->args[0]; + LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; int c; + unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; //TODO Rect { - LLVMValueRef txq_args[10]; - int txq_arg_count = 0; - LLVMValueRef size; - bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false); - txq_args[txq_arg_count++] = tinfo->args[1]; - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */ - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */ - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */ - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0); - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */ - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */ - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ - txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ - size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, - txq_args, txq_arg_count, - AC_FUNC_ATTR_READNONE); + struct ac_image_args txq_args = { 0 }; + + txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; + txq_args.opcode = ac_image_get_resinfo; + txq_args.dmask = 0xf; + txq_args.addr = ctx->i32zero; + txq_args.resource = args->resource; + LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args); for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); - half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]); + half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); } @@ -1704,78 +1672,70 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); } - tinfo->args[0] = coord; - return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); - + args->addr = coord; + return ac_build_image_opcode(&ctx->ac, args); } static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, nir_tex_instr *instr, - struct ac_tex_info *tinfo) -{ - const char *name = "llvm.SI.image.sample"; - const char *infix = ""; - char intr_name[127]; - char type[64]; - bool is_shadow = instr->is_shadow; - bool has_offset = tinfo->has_offset; + struct ac_image_args *args) +{ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + return ac_build_buffer_load_format(&ctx->ac, + args->resource, + args->addr, + LLVMConstInt(ctx->i32, 0, false), + true); + } + + args->opcode = ac_image_sample; + args->compare = instr->is_shadow; + switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_samples_identical: - name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" : - instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" : - "llvm.SI.image.load.mip"; - is_shadow = false; - has_offset = false; + args->opcode = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip; + args->compare = false; + args->offset = false; break; case nir_texop_txb: - infix = ".b"; + args->bias = true; break; case nir_texop_txl: - infix = ".l"; + args->lod = true; break; case nir_texop_txs: - name = "llvm.SI.getresinfo"; - break; case nir_texop_query_levels: - name = "llvm.SI.getresinfo"; + args->opcode = ac_image_get_resinfo; break; case nir_texop_tex: if (ctx->stage != MESA_SHADER_FRAGMENT) - infix = ".lz"; + args->level_zero = true; break; case nir_texop_txd: - infix = ".d"; + args->deriv = true; break; case nir_texop_tg4: - name = "llvm.SI.gather4"; - infix = ".lz"; + args->opcode = ac_image_gather4; + args->level_zero = true; break; case nir_texop_lod: - name = "llvm.SI.getlod"; - is_shadow = false; - has_offset = false; + args->opcode = ac_image_get_lod; + args->compare = false; + args->offset = false; break; default: break; } - build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type)); - sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix, - has_offset ? ".o" : "", type); - if (instr->op == nir_texop_tg4) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { - return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name, - (int)has_offset + (int)is_shadow); + return radv_lower_gather4_integer(ctx, args, instr); } } - return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); - + return ac_build_image_opcode(&ctx->ac, args); } static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, @@ -1785,15 +1745,17 @@ static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, unsigned desc_set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; - struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout; + struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; + struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; unsigned base_offset = layout->binding[binding].offset; LLVMValueRef offset, stride; if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + + layout->binding[binding].dynamic_offset_offset; desc_ptr = ctx->push_constants; - base_offset = ctx->options->layout->push_constant_size; - base_offset += 16 * layout->binding[binding].dynamic_offset_offset; + base_offset = pipeline_layout->push_constant_size + 16 * idx; stride = LLVMConstInt(ctx->i32, 16, false); } else stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false); @@ -1847,8 +1809,8 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, params[1] = get_src(ctx, instr->src[1]); params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ - params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */ - params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */ + params[4] = ctx->i1false; /* glc */ + params[5] = ctx->i1false; /* slc */ if (components_32bit > 1) data_type = LLVMVectorType(ctx->f32, components_32bit); @@ -1911,8 +1873,8 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, } params[0] = data; params[3] = offset; - ac_emit_llvm_intrinsic(&ctx->ac, store_name, - ctx->voidt, params, 6, 0); + ac_build_intrinsic(&ctx->ac, store_name, + ctx->voidt, params, 6, 0); } } @@ -1932,7 +1894,7 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, params[arg_count++] = get_src(ctx, instr->src[0]); params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ - params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */ + params[arg_count++] = ctx->i1false; /* slc */ switch (instr->intrinsic) { case nir_intrinsic_ssbo_atomic_add: @@ -1969,7 +1931,7 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, abort(); } - return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0); + return ac_build_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0); } static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, @@ -2006,11 +1968,11 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, get_src(ctx, instr->src[0]), LLVMConstInt(ctx->i32, 0, false), offset, - LLVMConstInt(ctx->i1, 0, false), - LLVMConstInt(ctx->i1, 0, false), + ctx->i1false, + ctx->i1false, }; - results[i] = ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); + results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); } @@ -2051,8 +2013,10 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), offset, "") }; - results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, - params, 2, AC_FUNC_ATTR_READNONE); + results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, + params, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); } @@ -2128,6 +2092,7 @@ load_gs_input(struct nir_to_llvm_context *ctx, unsigned param, vtx_offset_param; LLVMValueRef value[4], result; unsigned vertex_index; + unsigned cull_offset = 0; radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, &vertex_index, &const_index, &indir_index); @@ -2136,11 +2101,14 @@ load_gs_input(struct nir_to_llvm_context *ctx, vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param], LLVMConstInt(ctx->i32, 4, false), ""); + param = shader_io_get_unique_index(instr->variables[0]->var->data.location); + if (instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0) + cull_offset += ctx->num_input_clips; for (unsigned i = 0; i < instr->num_components; i++) { - param = shader_io_get_unique_index(instr->variables[0]->var->data.location); + args[0] = ctx->esgs_ring; args[1] = vtx_offset; - args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false); + args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index + cull_offset) * 256, false); args[3] = ctx->i32zero; args[4] = ctx->i32one; /* OFFEN */ args[5] = ctx->i32zero; /* IDXEN */ @@ -2148,8 +2116,10 @@ load_gs_input(struct nir_to_llvm_context *ctx, args[7] = ctx->i32zero; /* SLC */ args[8] = ctx->i32zero; /* TFE */ - value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", - ctx->i32, args, 9, AC_FUNC_ATTR_READONLY); + value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", + ctx->i32, args, 9, + AC_FUNC_ATTR_READONLY | + AC_FUNC_ATTR_LEGACY); } result = ac_build_gather_values(&ctx->ac, value, instr->num_components); @@ -2237,6 +2207,9 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); LLVMValueRef derived_ptr; + if (indir_index) + indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); + for (unsigned chan = 0; chan < ve; chan++) { LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); if (indir_index) @@ -2343,6 +2316,10 @@ visit_store_var(struct nir_to_llvm_context *ctx, break; case nir_var_shared: { LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); + + if (indir_index) + indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); + for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; @@ -2389,6 +2366,85 @@ static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) return 0; } + + +/* Adjust the sample index according to FMASK. + * + * For uncompressed MSAA surfaces, FMASK should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * For example, 0x11111100 means there are only 2 samples stored and + * the second sample covers 3/4 of the pixel. When reading samples 0 + * and 1, return physical sample 0 (determined by the first two 0s + * in FMASK), otherwise return physical sample 1. + * + * The sample index should be adjusted as follows: + * sample_index = (fmask >> (sample_index * 4)) & 0xF; + */ +static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx, + LLVMValueRef coord_x, LLVMValueRef coord_y, + LLVMValueRef coord_z, + LLVMValueRef sample_index, + LLVMValueRef fmask_desc_ptr) +{ + LLVMValueRef fmask_load_address[4]; + LLVMValueRef res; + + fmask_load_address[0] = coord_x; + fmask_load_address[1] = coord_y; + if (coord_z) { + fmask_load_address[2] = coord_z; + fmask_load_address[3] = LLVMGetUndef(ctx->i32); + } + + struct ac_image_args args = {0}; + + args.opcode = ac_image_load; + args.da = coord_z ? true : false; + args.resource = fmask_desc_ptr; + args.dmask = 0xf; + args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2); + + res = ac_build_image_opcode(&ctx->ac, &args); + + res = to_integer(ctx, res); + LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); + LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); + + LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, + res, + ctx->i32zero, ""); + + LLVMValueRef sample_index4 = + LLVMBuildMul(ctx->builder, sample_index, four, ""); + LLVMValueRef shifted_fmask = + LLVMBuildLShr(ctx->builder, fmask, sample_index4, ""); + LLVMValueRef final_sample = + LLVMBuildAnd(ctx->builder, shifted_fmask, F, ""); + + /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK + * resource descriptor is 0 (invalid), + */ + LLVMValueRef fmask_desc = + LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, + ctx->v8i32, ""); + + LLVMValueRef fmask_word1 = + LLVMBuildExtractElement(ctx->builder, fmask_desc, + ctx->i32one, ""); + + LLVMValueRef word1_is_nonzero = + LLVMBuildICmp(ctx->builder, LLVMIntNE, + fmask_word1, ctx->i32zero, ""); + + /* Replace the MSAA sample index. */ + sample_index = + LLVMBuildSelect(ctx->builder, word1_is_nonzero, + final_sample, sample_index, ""); + return sample_index; +} + static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { @@ -2403,6 +2459,8 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false), }; LLVMValueRef res; + LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0); + int count; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || @@ -2413,6 +2471,27 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, count = image_type_to_components_count(dim, glsl_sampler_type_is_array(type)); + if (is_ms) { + LLVMValueRef fmask_load_address[3]; + int chan; + + fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); + fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], ""); + if (glsl_sampler_type_is_array(type)) + fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], ""); + else + fmask_load_address[2] = NULL; + if (add_frag_pos) { + for (chan = 0; chan < 2; ++chan) + fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); + } + sample_index = adjust_sample_index_using_fmask(ctx, + fmask_load_address[0], + fmask_load_address[1], + fmask_load_address[2], + sample_index, + get_sampler_desc(ctx, instr->variables[0], DESC_FMASK)); + } if (count == 1) { if (instr->src[0].ssa->num_components) res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); @@ -2431,7 +2510,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); } if (is_ms) { - coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0); + coords[count] = sample_index; count++; } @@ -2444,67 +2523,6 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, return res; } -static void build_type_name_for_intr( - LLVMTypeRef type, - char *buf, unsigned bufsize) -{ - LLVMTypeRef elem_type = type; - - assert(bufsize >= 8); - - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { - int ret = snprintf(buf, bufsize, "v%u", - LLVMGetVectorSize(type)); - if (ret < 0) { - char *type_name = LLVMPrintTypeToString(type); - fprintf(stderr, "Error building type name for: %s\n", - type_name); - return; - } - elem_type = LLVMGetElementType(type); - buf += ret; - bufsize -= ret; - } - switch (LLVMGetTypeKind(elem_type)) { - default: break; - case LLVMIntegerTypeKind: - snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); - break; - case LLVMFloatTypeKind: - snprintf(buf, bufsize, "f32"); - break; - case LLVMDoubleTypeKind: - snprintf(buf, bufsize, "f64"); - break; - } -} - -static void get_image_intr_name(const char *base_name, - LLVMTypeRef data_type, - LLVMTypeRef coords_type, - LLVMTypeRef rsrc_type, - char *out_name, unsigned out_len) -{ - char coords_type_name[8]; - - build_type_name_for_intr(coords_type, coords_type_name, - sizeof(coords_type_name)); - - if (HAVE_LLVM <= 0x0309) { - snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name); - } else { - char data_type_name[8]; - char rsrc_type_name[8]; - - build_type_name_for_intr(data_type, data_type_name, - sizeof(data_type_name)); - build_type_name_for_intr(rsrc_type, rsrc_type_name, - sizeof(rsrc_type_name)); - snprintf(out_name, out_len, "%s.%s.%s.%s", base_name, - data_type_name, coords_type_name, rsrc_type_name); - } -} - static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { @@ -2522,44 +2540,44 @@ static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ - params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */ - params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */ - res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32, - params, 5, 0); + params[3] = ctx->i1false; /* glc */ + params[4] = ctx->i1false; /* slc */ + res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32, + params, 5, 0); res = trim_vector(ctx, res, instr->dest.ssa.num_components); res = to_integer(ctx, res); } else { bool is_da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero; - LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false); - LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false); + LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false; + LLVMValueRef glc = ctx->i1false; + LLVMValueRef slc = ctx->i1false; params[0] = get_image_coords(ctx, instr); params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ if (HAVE_LLVM <= 0x0309) { - params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */ + params[3] = ctx->i1false; /* r128 */ params[4] = da; params[5] = glc; params[6] = slc; } else { - LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false); + LLVMValueRef lwe = ctx->i1false; params[3] = glc; params[4] = slc; params[5] = lwe; params[6] = da; } - get_image_intr_name("llvm.amdgcn.image.load", - ctx->v4f32, /* vdata */ - LLVMTypeOf(params[0]), /* coords */ - LLVMTypeOf(params[1]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); + ac_get_image_intr_name("llvm.amdgcn.image.load", + ctx->v4f32, /* vdata */ + LLVMTypeOf(params[0]), /* coords */ + LLVMTypeOf(params[1]), /* rsrc */ + intrinsic_name, sizeof(intrinsic_name)); - res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32, - params, 7, AC_FUNC_ATTR_READONLY); + res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32, + params, 7, AC_FUNC_ATTR_READONLY); } return to_integer(ctx, res); } @@ -2570,8 +2588,6 @@ static void visit_image_store(struct nir_to_llvm_context *ctx, LLVMValueRef params[8]; char intrinsic_name[64]; const nir_variable *var = instr->variables[0]->var; - LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); - LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); const struct glsl_type *type = glsl_without_array(var->type); if (ctx->stage == MESA_SHADER_FRAGMENT) @@ -2583,42 +2599,42 @@ static void visit_image_store(struct nir_to_llvm_context *ctx, params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ - params[4] = i1false; /* glc */ - params[5] = i1false; /* slc */ - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt, - params, 6, 0); + params[4] = ctx->i1false; /* glc */ + params[5] = ctx->i1false; /* slc */ + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt, + params, 6, 0); } else { bool is_da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - LLVMValueRef da = is_da ? i1true : i1false; - LLVMValueRef glc = i1false; - LLVMValueRef slc = i1false; + LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false; + LLVMValueRef glc = ctx->i1false; + LLVMValueRef slc = ctx->i1false; params[0] = to_float(ctx, get_src(ctx, instr->src[2])); params[1] = get_image_coords(ctx, instr); /* coords */ params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ if (HAVE_LLVM <= 0x0309) { - params[4] = i1false; /* r128 */ + params[4] = ctx->i1false; /* r128 */ params[5] = da; params[6] = glc; params[7] = slc; } else { - LLVMValueRef lwe = i1false; + LLVMValueRef lwe = ctx->i1false; params[4] = glc; params[5] = slc; params[6] = lwe; params[7] = da; } - get_image_intr_name("llvm.amdgcn.image.store", - LLVMTypeOf(params[0]), /* vdata */ - LLVMTypeOf(params[1]), /* coords */ - LLVMTypeOf(params[2]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); + ac_get_image_intr_name("llvm.amdgcn.image.store", + LLVMTypeOf(params[0]), /* vdata */ + LLVMTypeOf(params[1]), /* coords */ + LLVMTypeOf(params[2]), /* rsrc */ + intrinsic_name, sizeof(intrinsic_name)); - ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, - params, 8, 0); + ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, + params, 8, 0); } } @@ -2629,8 +2645,7 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, LLVMValueRef params[6]; int param_count = 0; const nir_variable *var = instr->variables[0]->var; - LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); - LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); + const char *base_name = "llvm.amdgcn.image.atomic"; const char *atomic_name; LLVMValueRef coords; @@ -2649,17 +2664,17 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ params[param_count++] = ctx->i32zero; /* voffset */ - params[param_count++] = i1false; /* glc */ - params[param_count++] = i1false; /* slc */ + params[param_count++] = ctx->i1false; /* glc */ + params[param_count++] = ctx->i1false; /* slc */ } else { bool da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; coords = params[param_count++] = get_image_coords(ctx, instr); params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[param_count++] = i1false; /* r128 */ - params[param_count++] = da ? i1true : i1false; /* da */ - params[param_count++] = i1false; /* slc */ + params[param_count++] = ctx->i1false; /* r128 */ + params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */ + params[param_count++] = ctx->i1false; /* slc */ } switch (instr->intrinsic) { @@ -2695,14 +2710,13 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, snprintf(intrinsic_name, sizeof(intrinsic_name), "%s.%s.%s", base_name, atomic_name, coords_type); - return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0); + return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0); } static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef res; - LLVMValueRef params[10]; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = instr->variables[0]->var->type; bool da = glsl_sampler_type_is_array(var->type) || @@ -2712,19 +2726,16 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true); - params[0] = ctx->i32zero; - params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[2] = LLVMConstInt(ctx->i32, 15, false); - params[3] = ctx->i32zero; - params[4] = ctx->i32zero; - params[5] = da ? ctx->i32one : ctx->i32zero; - params[6] = ctx->i32zero; - params[7] = ctx->i32zero; - params[8] = ctx->i32zero; - params[9] = ctx->i32zero; - - res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, - params, 10, AC_FUNC_ATTR_READNONE); + + struct ac_image_args args = { 0 }; + + args.da = da; + args.dmask = 0xf; + args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); + args.opcode = ac_image_get_resinfo; + args.addr = ctx->i32zero; + + res = ac_build_image_opcode(&ctx->ac, &args); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) { @@ -2742,15 +2753,15 @@ static void emit_waitcnt(struct nir_to_llvm_context *ctx) LLVMValueRef args[1] = { LLVMConstInt(ctx->i32, 0xf70, false), }; - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt", - ctx->voidt, args, 1, 0); + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt", + ctx->voidt, args, 1, 0); } static void emit_barrier(struct nir_to_llvm_context *ctx) { // TODO tess - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier", - ctx->voidt, NULL, 0, 0); + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier", + ctx->voidt, NULL, 0, 0); } static void emit_discard_if(struct nir_to_llvm_context *ctx, @@ -2766,9 +2777,7 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx, cond = LLVMBuildSelect(ctx->builder, cond, LLVMConstReal(ctx->f32, -1.0f), ctx->f32zero, ""); - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", - ctx->voidt, - &cond, 1, 0); + ac_build_kill(&ctx->ac, cond); } static LLVMValueRef @@ -2901,7 +2910,6 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, unsigned location; unsigned chan; LLVMValueRef src_c0, src_c1; - const char *intr_name; LLVMValueRef src0; int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0; switch (instr->intrinsic) { @@ -2909,10 +2917,12 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, location = INTERP_CENTROID; break; case nir_intrinsic_interp_var_at_sample: - case nir_intrinsic_interp_var_at_offset: location = INTERP_SAMPLE; src0 = get_src(ctx, instr->src[0]); break; + case nir_intrinsic_interp_var_at_offset: + location = INTERP_CENTER; + src0 = get_src(ctx, instr->src[0]); default: break; } @@ -2935,7 +2945,7 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location); attr_number = LLVMConstInt(ctx->i32, input_index, false); - if (location == INTERP_SAMPLE) { + if (location == INTERP_SAMPLE || location == INTERP_CENTER) { LLVMValueRef ij_out[2]; LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param); @@ -2973,18 +2983,27 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); } - intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; + for (chan = 0; chan < 2; chan++) { - LLVMValueRef args[4]; LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); - args[0] = llvm_chan; - args[1] = attr_number; - args[2] = ctx->prim_mask; - args[3] = interp_param; - result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name, - ctx->f32, args, args[3] ? 4 : 3, - AC_FUNC_ATTR_READNONE); + if (interp_param) { + interp_param = LLVMBuildBitCast(ctx->builder, + interp_param, LLVMVectorType(ctx->f32, 2), ""); + LLVMValueRef i = LLVMBuildExtractElement( + ctx->builder, interp_param, ctx->i32zero, ""); + LLVMValueRef j = LLVMBuildExtractElement( + ctx->builder, interp_param, ctx->i32one, ""); + + result[chan] = ac_build_fs_interp(&ctx->ac, + llvm_chan, attr_number, + ctx->prim_mask, i, j); + } else { + result[chan] = ac_build_fs_interp_mov(&ctx->ac, + LLVMConstInt(ctx->i32, 2, false), + llvm_chan, attr_number, + ctx->prim_mask); + } } return ac_build_gather_values(&ctx->ac, result, 2); } @@ -2995,9 +3014,8 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, { LLVMValueRef gs_next_vertex; LLVMValueRef can_emit, kill; - LLVMValueRef args[2]; int idx; - + int clip_cull_slot = -1; assert(instr->const_index[0] == 0); /* Write vertex attribute values to GSVS ring */ gs_next_vertex = LLVMBuildLoad(ctx->builder, @@ -3015,56 +3033,71 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, kill = LLVMBuildSelect(ctx->builder, can_emit, LLVMConstReal(ctx->f32, 1.0f), LLVMConstReal(ctx->f32, -1.0f), ""); - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + ac_build_kill(&ctx->ac, kill); /* loop num outputs */ idx = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; + int length = 4; + int start = 0; + int slot = idx; + int slot_inc = 1; + if (!(ctx->output_mask & (1ull << i))) continue; - for (unsigned j = 0; j < 4; j++) { + if (i == VARYING_SLOT_CLIP_DIST1 || + i == VARYING_SLOT_CULL_DIST1) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0 || + i == VARYING_SLOT_CULL_DIST0) { + /* pack clip and cull into a single set of slots */ + if (clip_cull_slot == -1) { + clip_cull_slot = idx; + if (ctx->num_output_clips + ctx->num_output_culls > 4) + slot_inc = 2; + } else { + slot = clip_cull_slot; + slot_inc = 0; + } + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips; + if (i == VARYING_SLOT_CULL_DIST0) { + start = ctx->num_output_clips; + length = ctx->num_output_culls; + } + } + for (unsigned j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); - LLVMValueRef voffset = LLVMConstInt(ctx->i32, (idx * 4 + j) * ctx->gs_max_out_vertices, false); + LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j + start) * ctx->gs_max_out_vertices, false); voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, ""); voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), ""); out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, ""); - ac_build_tbuffer_store(&ctx->ac, ctx->gsvs_ring, - out_val, 1, - voffset, ctx->gs2vs_offset, 0, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 1, 0, 1, 1, 0); + ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, + out_val, 1, + voffset, ctx->gs2vs_offset, 0, + 1, 1, true, true); } - idx++; + idx += slot_inc; } gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex, ctx->i32one, ""); LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex); - args[0] = LLVMConstInt(ctx->i32, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (0 << 8), false); - args[1] = ctx->gs_wave_id; - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.sendmsg", - ctx->voidt, args, 2, 0); + + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); } static void visit_end_primitive(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef args[2]; - - assert(instr->const_index[0] == 0); - args[0] = LLVMConstInt(ctx->i32, SENDMSG_GS_OP_CUT | SENDMSG_GS | (0 << 8), false); - args[1] = ctx->gs_wave_id; - - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.sendmsg", ctx->voidt, - args, 2, 0); + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); } static void visit_intrinsic(struct nir_to_llvm_context *ctx, @@ -3112,6 +3145,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, ctx->shader_info->fs.force_persample = true; result = load_sample_pos(ctx); break; + case nir_intrinsic_load_sample_mask_in: + result = ctx->sample_coverage; + break; case nir_intrinsic_load_front_face: result = ctx->front_face; break; @@ -3183,9 +3219,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, break; case nir_intrinsic_discard: ctx->shader_info->fs.can_discard = true; - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", - ctx->voidt, - NULL, 0, 0); + ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", + ctx->voidt, + NULL, 0, AC_FUNC_ATTR_LEGACY); break; case nir_intrinsic_discard_if: emit_discard_if(ctx, instr); @@ -3244,6 +3280,7 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, LLVMBuilderRef builder = ctx->builder; LLVMTypeRef type; LLVMValueRef index = NULL; + unsigned constant_index = 0; assert(deref->var->data.binding < layout->binding_count); @@ -3280,6 +3317,21 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, if (child->deref_array_type == nir_deref_array_type_indirect) { index = get_src(ctx, child->indirect); } + + constant_index = child->base_offset; + } + if (desc_type == DESC_SAMPLER && binding->immutable_samplers && + (!index || binding->immutable_samplers_equal)) { + if (binding->immutable_samplers_equal) + constant_index = 0; + + LLVMValueRef constants[] = { + LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 0], 0), + LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 1], 0), + LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 2], 0), + LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 3], 0), + }; + return ac_build_gather_values(&ctx->ac, constants, 4); } assert(stride % type_size == 0); @@ -3296,14 +3348,13 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, } static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, - struct ac_tex_info *tinfo, + struct ac_image_args *args, nir_tex_instr *instr, nir_texop op, LLVMValueRef res_ptr, LLVMValueRef samp_ptr, LLVMValueRef *param, unsigned count, unsigned dmask) { - int num_args; unsigned is_rect = 0; bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; @@ -3314,42 +3365,21 @@ static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, param[count++] = LLVMGetUndef(ctx->i32); if (count > 1) - tinfo->args[0] = ac_build_gather_values(&ctx->ac, param, count); + args->addr = ac_build_gather_values(&ctx->ac, param, count); else - tinfo->args[0] = param[0]; - - tinfo->args[1] = res_ptr; - num_args = 2; + args->addr = param[0]; - if (op == nir_texop_txf || - op == nir_texop_txf_ms || - op == nir_texop_query_levels || - op == nir_texop_texture_samples || - op == nir_texop_txs) - tinfo->dst_type = ctx->v4i32; - else { - tinfo->dst_type = ctx->v4f32; - tinfo->args[num_args++] = samp_ptr; - } + args->resource = res_ptr; + args->sampler = samp_ptr; if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { - tinfo->args[0] = res_ptr; - tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false); - tinfo->args[2] = param[0]; - tinfo->arg_count = 3; + args->addr = param[0]; return; } - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0); - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */ - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */ - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0); - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */ - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */ - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ - tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ - - tinfo->arg_count = num_args; + args->dmask = dmask; + args->unorm = is_rect; + args->da = da; } /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. @@ -3407,7 +3437,7 @@ static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx, LLVMValueRef coord) { coord = to_float(ctx, coord); - coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0); + coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0); coord = to_integer(ctx, coord); return coord; } @@ -3415,7 +3445,7 @@ static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx, static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; - struct ac_tex_info tinfo = { 0 }; + struct ac_image_args args = { 0 }; unsigned dmask = 0xf; LLVMValueRef address[16]; LLVMValueRef coords[5]; @@ -3470,7 +3500,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) } if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - result = get_buffer_size(ctx, res_ptr, false); + result = get_buffer_size(ctx, res_ptr, true); goto write_result; } @@ -3507,7 +3537,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) for (chan = 0; chan < 3; ++chan) offset[chan] = ctx->i32zero; - tinfo.has_offset = true; + args.offset = true; for (chan = 0; chan < get_llvm_num_components(offsets); chan++) { offset[chan] = llvm_extract_elem(ctx, offsets, chan); offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan], @@ -3608,7 +3638,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) if (instr->op == nir_texop_samples_identical) { LLVMValueRef txf_address[4]; - struct ac_tex_info txf_info = { 0 }; + struct ac_image_args txf_args = { 0 }; unsigned txf_count = count; memcpy(txf_address, address, sizeof(txf_address)); @@ -3616,81 +3646,26 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) txf_address[2] = ctx->i32zero; txf_address[3] = ctx->i32zero; - set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf, + set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf, fmask_ptr, NULL, txf_address, txf_count, 0xf); - result = build_tex_intrinsic(ctx, instr, &txf_info); + result = build_tex_intrinsic(ctx, instr, &txf_args); result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero); goto write_result; } - /* Adjust the sample index according to FMASK. - * - * For uncompressed MSAA surfaces, FMASK should return 0x76543210, - * which is the identity mapping. Each nibble says which physical sample - * should be fetched to get that sample. - * - * For example, 0x11111100 means there are only 2 samples stored and - * the second sample covers 3/4 of the pixel. When reading samples 0 - * and 1, return physical sample 0 (determined by the first two 0s - * in FMASK), otherwise return physical sample 1. - * - * The sample index should be adjusted as follows: - * sample_index = (fmask >> (sample_index * 4)) & 0xF; - */ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) { - LLVMValueRef txf_address[4]; - struct ac_tex_info txf_info = { 0 }; - unsigned txf_count = count; - memcpy(txf_address, address, sizeof(txf_address)); - - if (!instr->is_array) - txf_address[2] = ctx->i32zero; - txf_address[3] = ctx->i32zero; - - set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf, - fmask_ptr, NULL, - txf_address, txf_count, 0xf); - - result = build_tex_intrinsic(ctx, instr, &txf_info); - LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); - LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); - - LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, - result, - ctx->i32zero, ""); - + if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && + instr->op != nir_texop_txs) { unsigned sample_chan = instr->is_array ? 3 : 2; - - LLVMValueRef sample_index4 = - LLVMBuildMul(ctx->builder, address[sample_chan], four, ""); - LLVMValueRef shifted_fmask = - LLVMBuildLShr(ctx->builder, fmask, sample_index4, ""); - LLVMValueRef final_sample = - LLVMBuildAnd(ctx->builder, shifted_fmask, F, ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid), - */ - LLVMValueRef fmask_desc = - LLVMBuildBitCast(ctx->builder, fmask_ptr, - ctx->v8i32, ""); - - LLVMValueRef fmask_word1 = - LLVMBuildExtractElement(ctx->builder, fmask_desc, - ctx->i32one, ""); - - LLVMValueRef word1_is_nonzero = - LLVMBuildICmp(ctx->builder, LLVMIntNE, - fmask_word1, ctx->i32zero, ""); - - /* Replace the MSAA sample index. */ - address[sample_chan] = - LLVMBuildSelect(ctx->builder, word1_is_nonzero, - final_sample, address[sample_chan], ""); + address[sample_chan] = adjust_sample_index_using_fmask(ctx, + address[0], + address[1], + instr->is_array ? address[2] : NULL, + address[sample_chan], + fmask_ptr); } if (offsets && instr->op == nir_texop_txf) { @@ -3717,10 +3692,10 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) else dmask = 1 << instr->component; } - set_tex_fetch_args(ctx, &tinfo, instr, instr->op, + set_tex_fetch_args(ctx, &args, instr, instr->op, res_ptr, samp_ptr, address, count, dmask); - result = build_tex_intrinsic(ctx, instr, &tinfo); + result = build_tex_intrinsic(ctx, instr, &args); if (instr->op == nir_texop_query_levels) result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), ""); @@ -3962,9 +3937,10 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, args[0] = t_list; args[1] = LLVMConstInt(ctx->i32, 0, false); args[2] = buffer_index; - input = ac_emit_llvm_intrinsic(&ctx->ac, + input = ac_build_intrinsic(&ctx->ac, "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); @@ -3975,6 +3951,21 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, } } +static void +handle_gs_input_decl(struct nir_to_llvm_context *ctx, + struct nir_variable *variable) +{ + int idx = variable->data.location; + + if (idx == VARYING_SLOT_CLIP_DIST0 || + idx == VARYING_SLOT_CULL_DIST0) { + int length = glsl_get_length(glsl_get_array_element(variable->type)); + if (idx == VARYING_SLOT_CLIP_DIST0) + ctx->num_input_clips = length; + else + ctx->num_input_culls = length; + } +} static void interp_fs_input(struct nir_to_llvm_context *ctx, unsigned attr, @@ -3982,9 +3973,10 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx, LLVMValueRef prim_mask, LLVMValueRef result[4]) { - const char *intr_name; LLVMValueRef attr_number; unsigned chan; + LLVMValueRef i, j; + bool interp = interp_param != NULL; attr_number = LLVMConstInt(ctx->i32, attr, false); @@ -3998,19 +3990,31 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx, * fs.interp cannot be used on integers, because they can be equal * to NaN. */ - intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; + if (interp) { + interp_param = LLVMBuildBitCast(ctx->builder, interp_param, + LLVMVectorType(ctx->f32, 2), ""); + + i = LLVMBuildExtractElement(ctx->builder, interp_param, + ctx->i32zero, ""); + j = LLVMBuildExtractElement(ctx->builder, interp_param, + ctx->i32one, ""); + } for (chan = 0; chan < 4; chan++) { - LLVMValueRef args[4]; LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); - args[0] = llvm_chan; - args[1] = attr_number; - args[2] = prim_mask; - args[3] = interp_param; - result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name, - ctx->f32, args, args[3] ? 4 : 3, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + if (interp) { + result[chan] = ac_build_fs_interp(&ctx->ac, + llvm_chan, + attr_number, + prim_mask, i, j); + } else { + result[chan] = ac_build_fs_interp_mov(&ctx->ac, + LLVMConstInt(ctx->i32, 2, false), + llvm_chan, + attr_number, + prim_mask); + } } } @@ -4055,6 +4059,9 @@ handle_shader_input_decl(struct nir_to_llvm_context *ctx, case MESA_SHADER_FRAGMENT: handle_fs_input_decl(ctx, variable); break; + case MESA_SHADER_GEOMETRY: + handle_gs_input_decl(ctx, variable); + break; default: break; } @@ -4086,7 +4093,7 @@ handle_fs_inputs_pre(struct nir_to_llvm_context *ctx, for(int i = 0; i < 3; ++i) inputs[i] = ctx->frag_pos[i]; - inputs[3] = ac_emit_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]); + inputs[3] = ac_build_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]); } } ctx->shader_info->fs.num_interp = index; @@ -4149,14 +4156,14 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx, if (idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CULL_DIST0) { int length = glsl_get_length(variable->type); - if (ctx->stage == MESA_SHADER_VERTEX) { - if (idx == VARYING_SLOT_CLIP_DIST0) { + if (idx == VARYING_SLOT_CLIP_DIST0) { + if (ctx->stage == MESA_SHADER_VERTEX) ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1; - ctx->num_clips = length; - } else if (idx == VARYING_SLOT_CULL_DIST0) { + ctx->num_output_clips = length; + } else if (idx == VARYING_SLOT_CULL_DIST0) { + if (ctx->stage == MESA_SHADER_VERTEX) ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1; - ctx->num_culls = length; - } + ctx->num_output_culls = length; } if (length > 4) attrib_count = 2; @@ -4223,23 +4230,25 @@ static void si_llvm_init_export_args(struct nir_to_llvm_context *ctx, LLVMValueRef *values, unsigned target, - LLVMValueRef *args) + struct ac_export_args *args) { /* Default is 0xf. Adjusted below depending on the format. */ - args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false); + args->enabled_channels = 0xf; + /* Specify whether the EXEC mask represents the valid mask */ - args[1] = LLVMConstInt(ctx->i32, 0, false); + args->valid_mask = 0; /* Specify whether this is the last export */ - args[2] = LLVMConstInt(ctx->i32, 0, false); + args->done = 0; + /* Specify the target we are exporting */ - args[3] = LLVMConstInt(ctx->i32, target, false); + args->target = target; - args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */ - args[5] = LLVMGetUndef(ctx->f32); - args[6] = LLVMGetUndef(ctx->f32); - args[7] = LLVMGetUndef(ctx->f32); - args[8] = LLVMGetUndef(ctx->f32); + args->compr = false; + args->out[0] = LLVMGetUndef(ctx->f32); + args->out[1] = LLVMGetUndef(ctx->f32); + args->out[2] = LLVMGetUndef(ctx->f32); + args->out[3] = LLVMGetUndef(ctx->f32); if (!values) return; @@ -4252,29 +4261,29 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, switch(col_format) { case V_028714_SPI_SHADER_ZERO: - args[0] = LLVMConstInt(ctx->i32, 0x0, 0); - args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0); + args->enabled_channels = 0; /* writemask */ + args->target = V_008DFC_SQ_EXP_NULL; break; case V_028714_SPI_SHADER_32_R: - args[0] = LLVMConstInt(ctx->i32, 0x1, 0); - args[5] = values[0]; + args->enabled_channels = 1; + args->out[0] = values[0]; break; case V_028714_SPI_SHADER_32_GR: - args[0] = LLVMConstInt(ctx->i32, 0x3, 0); - args[5] = values[0]; - args[6] = values[1]; + args->enabled_channels = 0x3; + args->out[0] = values[0]; + args->out[1] = values[1]; break; case V_028714_SPI_SHADER_32_AR: - args[0] = LLVMConstInt(ctx->i32, 0x9, 0); - args[5] = values[0]; - args[8] = values[3]; + args->enabled_channels = 0x9; + args->out[0] = values[0]; + args->out[3] = values[3]; break; case V_028714_SPI_SHADER_FP16_ABGR: - args[4] = ctx->i32one; + args->compr = 1; for (unsigned chan = 0; chan < 2; chan++) { LLVMValueRef pack_args[2] = { @@ -4283,16 +4292,14 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, }; LLVMValueRef packed; - packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16", - ctx->i32, pack_args, 2, - AC_FUNC_ATTR_READNONE); - args[chan + 5] = packed; + packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args); + args->out[chan] = packed; } break; case V_028714_SPI_SHADER_UNORM16_ABGR: for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = emit_float_saturate(ctx, values[chan], 0, 1); + val[chan] = ac_build_clamp(&ctx->ac, values[chan]); val[chan] = LLVMBuildFMul(ctx->builder, val[chan], LLVMConstReal(ctx->f32, 65535), ""); val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], @@ -4301,9 +4308,9 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, ctx->i32, ""); } - args[4] = ctx->i32one; - args[5] = emit_pack_int16(ctx, val[0], val[1]); - args[6] = emit_pack_int16(ctx, val[2], val[3]); + args->compr = 1; + args->out[0] = emit_pack_int16(ctx, val[0], val[1]); + args->out[1] = emit_pack_int16(ctx, val[2], val[3]); break; case V_028714_SPI_SHADER_SNORM16_ABGR: @@ -4322,9 +4329,9 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, ""); } - args[4] = ctx->i32one; - args[5] = emit_pack_int16(ctx, val[0], val[1]); - args[6] = emit_pack_int16(ctx, val[2], val[3]); + args->compr = 1; + args->out[0] = emit_pack_int16(ctx, val[0], val[1]); + args->out[1] = emit_pack_int16(ctx, val[2], val[3]); break; case V_028714_SPI_SHADER_UINT16_ABGR: { @@ -4335,9 +4342,9 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max); } - args[4] = ctx->i32one; - args[5] = emit_pack_int16(ctx, val[0], val[1]); - args[6] = emit_pack_int16(ctx, val[2], val[3]); + args->compr = 1; + args->out[0] = emit_pack_int16(ctx, val[0], val[1]); + args->out[1] = emit_pack_int16(ctx, val[2], val[3]); break; } @@ -4352,22 +4359,22 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min); } - args[4] = ctx->i32one; - args[5] = emit_pack_int16(ctx, val[0], val[1]); - args[6] = emit_pack_int16(ctx, val[2], val[3]); + args->compr = 1; + args->out[0] = emit_pack_int16(ctx, val[0], val[1]); + args->out[1] = emit_pack_int16(ctx, val[2], val[3]); break; } default: case V_028714_SPI_SHADER_32_ABGR: - memcpy(&args[5], values, sizeof(values[0]) * 4); + memcpy(&args->out[0], values, sizeof(values[0]) * 4); break; } } else - memcpy(&args[5], values, sizeof(values[0]) * 4); + memcpy(&args->out[0], values, sizeof(values[0]) * 4); - for (unsigned i = 5; i < 9; ++i) - args[i] = to_float(ctx, args[i]); + for (unsigned i = 0; i < 4; ++i) + args->out[i] = to_float(ctx, args->out[i]); } static void @@ -4376,8 +4383,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) uint32_t param_count = 0; unsigned target; unsigned pos_idx, num_pos_exports = 0; - LLVMValueRef args[9]; - LLVMValueRef pos_args[4][9] = { { 0 } }; + struct ac_export_args args, pos_args[4] = {}; LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; int i; const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) | @@ -4392,31 +4398,31 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) unsigned j; if (ctx->shader_info->vs.cull_dist_mask) - ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips; + ctx->shader_info->vs.cull_dist_mask <<= ctx->num_output_clips; i = VARYING_SLOT_CLIP_DIST0; - for (j = 0; j < ctx->num_clips; j++) + for (j = 0; j < ctx->num_output_clips; j++) slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); i = VARYING_SLOT_CULL_DIST0; - for (j = 0; j < ctx->num_culls; j++) - slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder, + for (j = 0; j < ctx->num_output_culls; j++) + slots[ctx->num_output_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder, ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); - for (i = ctx->num_clips + ctx->num_culls; i < 8; i++) + for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) slots[i] = LLVMGetUndef(ctx->f32); - if (ctx->num_clips + ctx->num_culls > 4) { + if (ctx->num_output_clips + ctx->num_output_culls > 4) { target = V_008DFC_SQ_EXP_POS + 3; - si_llvm_init_export_args(ctx, &slots[4], target, args); - memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], - args, sizeof(args)); + si_llvm_init_export_args(ctx, &slots[4], target, &args); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); } target = V_008DFC_SQ_EXP_POS + 2; - si_llvm_init_export_args(ctx, &slots[0], target, args); - memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], - args, sizeof(args)); + si_llvm_init_export_args(ctx, &slots[0], target, &args); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); } @@ -4460,72 +4466,66 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) param_count++; } - si_llvm_init_export_args(ctx, values, target, args); + si_llvm_init_export_args(ctx, values, target, &args); if (target >= V_008DFC_SQ_EXP_POS && target <= (V_008DFC_SQ_EXP_POS + 3)) { - memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], - args, sizeof(args)); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); } else { - ac_emit_llvm_intrinsic(&ctx->ac, - "llvm.SI.export", - ctx->voidt, - args, 9, 0); + ac_build_export(&ctx->ac, &args); } } /* We need to add the position output manually if it's missing. */ - if (!pos_args[0][0]) { - pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false); - pos_args[0][1] = ctx->i32zero; /* EXEC mask */ - pos_args[0][2] = ctx->i32zero; /* last export? */ - pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false); - pos_args[0][4] = ctx->i32zero; /* COMPR flag */ - pos_args[0][5] = ctx->f32zero; /* X */ - pos_args[0][6] = ctx->f32zero; /* Y */ - pos_args[0][7] = ctx->f32zero; /* Z */ - pos_args[0][8] = ctx->f32one; /* W */ + if (!pos_args[0].out[0]) { + pos_args[0].enabled_channels = 0xf; + pos_args[0].valid_mask = 0; + pos_args[0].done = 0; + pos_args[0].target = V_008DFC_SQ_EXP_POS; + pos_args[0].compr = 0; + pos_args[0].out[0] = ctx->f32zero; /* X */ + pos_args[0].out[1] = ctx->f32zero; /* Y */ + pos_args[0].out[2] = ctx->f32zero; /* Z */ + pos_args[0].out[3] = ctx->f32one; /* W */ } uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) | (ctx->shader_info->vs.writes_layer == true ? 4 : 0) | (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0)); if (mask) { - pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* writemask */ - pos_args[1][1] = ctx->i32zero; /* EXEC mask */ - pos_args[1][2] = ctx->i32zero; /* last export? */ - pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false); - pos_args[1][4] = ctx->i32zero; /* COMPR flag */ - pos_args[1][5] = ctx->f32zero; /* X */ - pos_args[1][6] = ctx->f32zero; /* Y */ - pos_args[1][7] = ctx->f32zero; /* Z */ - pos_args[1][8] = ctx->f32zero; /* W */ + pos_args[1].enabled_channels = mask; + pos_args[1].valid_mask = 0; + pos_args[1].done = 0; + pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; + pos_args[1].compr = 0; + pos_args[1].out[0] = ctx->f32zero; /* X */ + pos_args[1].out[1] = ctx->f32zero; /* Y */ + pos_args[1].out[2] = ctx->f32zero; /* Z */ + pos_args[1].out[3] = ctx->f32zero; /* W */ if (ctx->shader_info->vs.writes_pointsize == true) - pos_args[1][5] = psize_value; + pos_args[1].out[0] = psize_value; if (ctx->shader_info->vs.writes_layer == true) - pos_args[1][7] = layer_value; + pos_args[1].out[2] = layer_value; if (ctx->shader_info->vs.writes_viewport_index == true) - pos_args[1][8] = viewport_index_value; + pos_args[1].out[3] = viewport_index_value; } for (i = 0; i < 4; i++) { - if (pos_args[i][0]) + if (pos_args[i].out[0]) num_pos_exports++; } pos_idx = 0; for (i = 0; i < 4; i++) { - if (!pos_args[i][0]) + if (!pos_args[i].out[0]) continue; /* Specify the target we are exporting */ - pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false); + pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; if (pos_idx == num_pos_exports) - pos_args[i][2] = ctx->i32one; - ac_emit_llvm_intrinsic(&ctx->ac, - "llvm.SI.export", - ctx->voidt, - pos_args[i], 9, 0); + pos_args[i].done = 1; + ac_build_export(&ctx->ac, &pos_args[i]); } ctx->shader_info->vs.pos_exports = num_pos_exports; @@ -4540,26 +4540,32 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx) for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; int param_index; + int length = 4; + int start = 0; if (!(ctx->output_mask & (1ull << i))) continue; + if (i == VARYING_SLOT_CLIP_DIST0) { + length = ctx->num_output_clips; + } else if (i == VARYING_SLOT_CULL_DIST0) { + start = ctx->num_output_clips; + length = ctx->num_output_culls; + } param_index = shader_io_get_unique_index(i); if (param_index > max_output_written) max_output_written = param_index; - for (j = 0; j < 4; j++) { + for (j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, ""); - ac_build_tbuffer_store(&ctx->ac, + ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, - LLVMGetUndef(ctx->i32), ctx->es2gs_offset, - (4 * param_index + j) * 4, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 0, 0, 1, 1, 0); + NULL, ctx->es2gs_offset, + (4 * param_index + j + start) * 4, + 1, 1, true, true); } } ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16; @@ -4569,19 +4575,20 @@ static void si_export_mrt_color(struct nir_to_llvm_context *ctx, LLVMValueRef *color, unsigned param, bool is_last) { - LLVMValueRef args[9]; + + struct ac_export_args args; + /* Export */ si_llvm_init_export_args(ctx, color, param, - args); + &args); if (is_last) { - args[1] = ctx->i32one; /* whether the EXEC mask is valid */ - args[2] = ctx->i32one; /* DONE bit */ - } else if (args[0] == ctx->i32zero) + args.valid_mask = 1; /* whether the EXEC mask is valid */ + args.done = 1; /* DONE bit */ + } else if (!args.enabled_channels) return; /* unnecessary NULL export */ - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ac_build_export(&ctx->ac, &args); } static void @@ -4589,43 +4596,41 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, LLVMValueRef samplemask) { - LLVMValueRef args[9]; - unsigned mask = 0; - args[1] = ctx->i32one; /* whether the EXEC mask is valid */ - args[2] = ctx->i32one; /* DONE bit */ - /* Specify the target we are exporting */ - args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false); + struct ac_export_args args; - args[4] = ctx->i32zero; /* COMP flag */ - args[5] = LLVMGetUndef(ctx->f32); /* R, depth */ - args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ - args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */ - args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ + args.enabled_channels = 0; + args.valid_mask = 1; + args.done = 1; + args.target = V_008DFC_SQ_EXP_MRTZ; + args.compr = false; + + args.out[0] = LLVMGetUndef(ctx->f32); /* R, depth */ + args.out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ + args.out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */ + args.out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ if (depth) { - args[5] = depth; - mask |= 0x1; + args.out[0] = depth; + args.enabled_channels |= 0x1; } if (stencil) { - args[6] = stencil; - mask |= 0x2; + args.out[1] = stencil; + args.enabled_channels |= 0x2; } if (samplemask) { - args[7] = samplemask; - mask |= 0x04; + args.out[2] = samplemask; + args.enabled_channels |= 0x4; } /* SI (except OLAND) has a bug that it only looks * at the X writemask component. */ if (ctx->options->chip_class == SI && ctx->options->family != CHIP_OLAND) - mask |= 0x01; + args.enabled_channels |= 0x1; - args[0] = LLVMConstInt(ctx->i32, mask, false); - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ac_build_export(&ctx->ac, &args); } static void @@ -4648,13 +4653,17 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) ctx->shader_info->fs.writes_stencil = true; stencil = to_float(ctx, LLVMBuildLoad(ctx->builder, ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + } else if (i == FRAG_RESULT_SAMPLE_MASK) { + ctx->shader_info->fs.writes_sample_mask = true; + samplemask = to_float(ctx, LLVMBuildLoad(ctx->builder, + ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else { bool last = false; for (unsigned j = 0; j < 4; j++) values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); - if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil) + if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask) last = ctx->output_mask <= ((1ull << (i + 1)) - 1); si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last); @@ -4662,7 +4671,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) } } - if (depth || stencil) + if (depth || stencil || samplemask) si_export_mrt_z(ctx, depth, stencil, samplemask); else if (!index) si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true); @@ -4673,12 +4682,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) static void emit_gs_epilogue(struct nir_to_llvm_context *ctx) { - LLVMValueRef args[2]; - - args[0] = LLVMConstInt(ctx->i32, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE, false); - args[1] = ctx->gs_wave_id; - ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.sendmsg", - ctx->voidt, args, 2, 0); + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id); } static void @@ -4785,6 +4789,13 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, memset(shader_info, 0, sizeof(*shader_info)); LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); + + LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); + char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); + LLVMSetDataLayout(ctx.module, data_layout_str); + LLVMDisposeTargetData(data_layout); + LLVMDisposeMessage(data_layout_str); + setup_types(&ctx); ctx.builder = LLVMCreateBuilderInContext(ctx.context); @@ -4812,7 +4823,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, idx++; } - shared_size *= 4; + shared_size *= 16; var = LLVMAddGlobalInAddressSpace(ctx.module, LLVMArrayType(ctx.i8, shared_size), "compute_lds", @@ -5040,25 +5051,54 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) args[8] = ctx->i32zero; /* TFE */ int idx = 0; + int clip_cull_slot = -1; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { + int length = 4; + int start = 0; + int slot = idx; + int slot_inc = 1; if (!(ctx->output_mask & (1ull << i))) continue; - for (unsigned j = 0; j < 4; j++) { + if (i == VARYING_SLOT_CLIP_DIST1 || + i == VARYING_SLOT_CULL_DIST1) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0 || + i == VARYING_SLOT_CULL_DIST0) { + /* unpack clip and cull from a single set of slots */ + if (clip_cull_slot == -1) { + clip_cull_slot = idx; + if (ctx->num_output_clips + ctx->num_output_culls > 4) + slot_inc = 2; + } else { + slot = clip_cull_slot; + slot_inc = 0; + } + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips; + if (i == VARYING_SLOT_CULL_DIST0) { + start = ctx->num_output_clips; + length = ctx->num_output_culls; + } + } + + for (unsigned j = 0; j < length; j++) { LLVMValueRef value; args[2] = LLVMConstInt(ctx->i32, - (idx * 4 + j) * + (slot * 4 + j + start) * ctx->gs_max_out_vertices * 16 * 4, false); - value = ac_emit_llvm_intrinsic(&ctx->ac, - "llvm.SI.buffer.load.dword.i32.i32", - ctx->i32, args, 9, - AC_FUNC_ATTR_READONLY); + value = ac_build_intrinsic(&ctx->ac, + "llvm.SI.buffer.load.dword.i32.i32", + ctx->i32, args, 9, + AC_FUNC_ATTR_READONLY | + AC_FUNC_ATTR_LEGACY); LLVMBuildStore(ctx->builder, to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]); } - idx++; + idx += slot_inc; } handle_vs_outputs_post(ctx); }