X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_nir_to_llvm.c;h=550c12d249c7a885b3fe1bdc9b143d58e8acf2a1;hb=bb750d265c5c27136cf10460fc423503a4d7d5aa;hp=7425f0af20c1a0dd4ceaaf6c3a06956f3fe74d56;hpb=49b0a140a731069e0e4959c65bfd1b597a4fb141;p=mesa.git diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 7425f0af20c..550c12d249c 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -43,9 +43,6 @@ enum radeon_llvm_calling_convention { RADEON_LLVM_AMDGPU_HS = 93, }; -#define CONST_ADDR_SPACE 2 -#define LOCAL_ADDR_SPACE 3 - #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) @@ -93,9 +90,6 @@ struct nir_to_llvm_context { LLVMValueRef ring_offsets; LLVMValueRef push_constants; LLVMValueRef view_index; - LLVMValueRef num_work_groups; - LLVMValueRef workgroup_ids[3]; - LLVMValueRef local_invocation_ids; LLVMValueRef tg_size; LLVMValueRef vertex_buffers; @@ -126,7 +120,6 @@ struct nir_to_llvm_context { LLVMValueRef hs_ring_tess_offchip; LLVMValueRef hs_ring_tess_factor; - LLVMValueRef prim_mask; LLVMValueRef sample_pos_offset; LLVMValueRef persp_sample, persp_center, persp_centroid; LLVMValueRef linear_sample, linear_center, linear_centroid; @@ -159,28 +152,6 @@ nir_to_llvm_context_from_abi(struct ac_shader_abi *abi) return container_of(abi, ctx, abi); } -static LLVMTypeRef -nir2llvmtype(struct ac_nir_context *ctx, - const struct glsl_type *type) -{ - switch (glsl_get_base_type(glsl_without_array(type))) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - return ctx->ac.i32; - case GLSL_TYPE_UINT64: - case GLSL_TYPE_INT64: - return ctx->ac.i64; - case GLSL_TYPE_DOUBLE: - return ctx->ac.f64; - case GLSL_TYPE_FLOAT: - return ctx->ac.f32; - default: - assert(!"Unsupported type in nir2llvmtype()"); - break; - } - return 0; -} - static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, enum ac_descriptor_type desc_type, @@ -323,15 +294,13 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); for (unsigned i = 0; i < args->sgpr_count; ++i) { + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); + if (args->array_params_mask & (1 << i)) { LLVMValueRef P = LLVMGetParam(main_function, i); - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL); ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); } - else { - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); - } } if (max_workgroup_size) { @@ -360,12 +329,6 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, return main_function; } -static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) -{ - return LLVMPointerType(LLVMArrayType(elem_type, num_elements), - CONST_ADDR_SPACE); -} - static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) { if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) @@ -652,7 +615,7 @@ declare_global_input_sgprs(struct nir_to_llvm_context *ctx, struct arg_info *args, LLVMValueRef *desc_sets) { - LLVMTypeRef type = const_array(ctx->ac.i8, 1024 * 1024); + LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; unsigned stage_mask = 1 << stage; @@ -669,7 +632,7 @@ declare_global_input_sgprs(struct nir_to_llvm_context *ctx, } } } else { - add_array_arg(args, const_array(type, 32), desc_sets); + add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); } if (ctx->shader_info->info.loads_push_constants) { @@ -689,7 +652,7 @@ declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), + add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), &ctx->vertex_buffers); } add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); @@ -804,7 +767,7 @@ static void create_function(struct nir_to_llvm_context *ctx, allocate_user_sgprs(ctx, stage, needs_view_index, &user_sgpr_info); if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { - add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), + add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), &ctx->ring_offsets); } @@ -816,21 +779,21 @@ static void create_function(struct nir_to_llvm_context *ctx, if (ctx->shader_info->info.cs.uses_grid_size) { add_arg(&args, ARG_SGPR, ctx->ac.v3i32, - &ctx->num_work_groups); + &ctx->abi.num_work_groups); } for (int i = 0; i < 3; i++) { - ctx->workgroup_ids[i] = NULL; + ctx->abi.workgroup_ids[i] = NULL; if (ctx->shader_info->info.cs.uses_block_id[i]) { add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->workgroup_ids[i]); + &ctx->abi.workgroup_ids[i]); } } if (ctx->shader_info->info.cs.uses_local_invocation_idx) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size); add_arg(&args, ARG_VGPR, ctx->ac.v3i32, - &ctx->local_invocation_ids); + &ctx->abi.local_invocation_ids); break; case MESA_SHADER_VERTEX: declare_global_input_sgprs(ctx, stage, has_previous_stage, @@ -1038,7 +1001,7 @@ static void create_function(struct nir_to_llvm_context *ctx, add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->sample_pos_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->prim_mask); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid); @@ -1084,10 +1047,10 @@ static void create_function(struct nir_to_llvm_context *ctx, &user_sgpr_idx, 2); if (ctx->options->supports_spill) { ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->ac.i8, CONST_ADDR_SPACE), + LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), NULL, 0, AC_FUNC_ATTR_READNONE); ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets, - const_array(ctx->ac.v4i32, 16), ""); + ac_array_in_const_addr_space(ctx->ac.v4i32), ""); } } @@ -1417,15 +1380,24 @@ static LLVMValueRef emit_isign(struct ac_llvm_context *ctx, } static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, unsigned bitsize) { - const char *intr = "llvm.floor.f32"; + LLVMTypeRef type; + char *intr; + + if (bitsize == 32) { + intr = "llvm.floor.f32"; + type = ctx->f32; + } else { + intr = "llvm.floor.f64"; + type = ctx->f64; + } + LLVMValueRef fsrc0 = ac_to_float(ctx, src0); LLVMValueRef params[] = { fsrc0, }; - LLVMValueRef floor = ac_build_intrinsic(ctx, intr, - ctx->f32, params, 1, + LLVMValueRef floor = ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE); return LLVMBuildFSub(ctx->builder, fsrc0, floor, ""); } @@ -1769,11 +1741,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[1] = ac_to_float(&ctx->ac, src[1]); result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; - case nir_op_fdiv: - src[0] = ac_to_float(&ctx->ac, src[0]); - src[1] = ac_to_float(&ctx->ac, src[1]); - result = ac_build_fdiv(&ctx->ac, src[0], src[1]); - break; case nir_op_frcp: src[0] = ac_to_float(&ctx->ac, src[0]); result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, @@ -1879,7 +1846,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) ac_to_float_type(&ctx->ac, def_type),src[0]); break; case nir_op_ffract: - result = emit_ffract(&ctx->ac, src[0]); + result = emit_ffract(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; case nir_op_fsin: result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", @@ -2323,7 +2290,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, args->addr, ctx->ac.i32_0, util_last_bit(mask), - true); + false, true); } args->opcode = ac_image_sample; @@ -3072,7 +3039,6 @@ load_gs_input(struct ac_shader_abi *abi, { struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); LLVMValueRef vtx_offset; - LLVMValueRef args[9]; unsigned param, vtx_offset_param; LLVMValueRef value[4], result; @@ -3090,24 +3056,23 @@ load_gs_input(struct ac_shader_abi *abi, LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); value[i] = ac_lds_load(&ctx->ac, dw_addr); } else { - args[0] = ctx->esgs_ring; - args[1] = vtx_offset; - args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false); - args[3] = ctx->ac.i32_0; - args[4] = ctx->ac.i32_1; /* OFFEN */ - args[5] = ctx->ac.i32_0; /* IDXEN */ - args[6] = ctx->ac.i32_1; /* GLC */ - args[7] = ctx->ac.i32_0; /* SLC */ - args[8] = ctx->ac.i32_0; /* TFE */ - - value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", - ctx->ac.i32, args, 9, - AC_FUNC_ATTR_READONLY | - AC_FUNC_ATTR_LEGACY); + LLVMValueRef soffset = + LLVMConstInt(ctx->ac.i32, + (param * 4 + i + const_index) * 256, + false); + + value[i] = ac_build_buffer_load(&ctx->ac, + ctx->esgs_ring, 1, + ctx->ac.i32_0, + vtx_offset, soffset, + 0, 1, 0, true, false); + + value[i] = LLVMBuildBitCast(ctx->builder, value[i], + type, ""); } } result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); - + result = ac_to_integer(&ctx->ac, result); return result; } @@ -3202,16 +3167,17 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, } if (ctx->stage == MESA_SHADER_GEOMETRY) { - LLVMValueRef indir_index; - unsigned const_index, vertex_index; - get_deref_offset(ctx, instr->variables[0], - false, &vertex_index, NULL, - &const_index, &indir_index); + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + LLVMValueRef indir_index; + unsigned const_index, vertex_index; + get_deref_offset(ctx, instr->variables[0], + false, &vertex_index, NULL, + &const_index, &indir_index); + return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location, instr->variables[0]->var->data.driver_location, instr->variables[0]->var->data.location_frac, ve, - vertex_index, const_index, - nir2llvmtype(ctx, instr->variables[0]->var->type)); + vertex_index, const_index, type); } for (unsigned chan = comp; chan < ve + comp; chan++) { @@ -3632,24 +3598,31 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, type = instr->variables[0]->deref.child->type; type = glsl_without_array(type); - if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false); - params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), - ctx->ac.i32_0, ""); /* vindex */ - params[2] = ctx->ac.i32_0; /* voffset */ - params[3] = ctx->ac.i1false; /* glc */ - params[4] = ctx->ac.i1false; /* slc */ - res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32, - params, 5, 0); + + const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); + if (dim == GLSL_SAMPLER_DIM_BUF) { + unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + unsigned num_channels = util_last_bit(mask); + LLVMValueRef rsrc, vindex; + + rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false); + vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); + + /* TODO: set "glc" and "can_speculate" when OpenGL needs it. */ + res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, + ctx->ac.i32_0, num_channels, + false, false); + res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); res = ac_to_integer(&ctx->ac, res); } else { bool is_da = glsl_sampler_type_is_array(type) || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS; + dim == GLSL_SAMPLER_DIM_CUBE || + dim == GLSL_SAMPLER_DIM_3D || + dim == GLSL_SAMPLER_DIM_SUBPASS || + dim == GLSL_SAMPLER_DIM_SUBPASS_MS; LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false; LLVMValueRef glc = ctx->ac.i1false; LLVMValueRef slc = ctx->ac.i1false; @@ -3657,18 +3630,10 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, params[0] = get_image_coords(ctx, instr); params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ - if (HAVE_LLVM <= 0x0309) { - params[3] = ctx->ac.i1false; /* r128 */ - params[4] = da; - params[5] = glc; - params[6] = slc; - } else { - LLVMValueRef lwe = ctx->ac.i1false; - params[3] = glc; - params[4] = slc; - params[5] = lwe; - params[6] = da; - } + params[3] = glc; + params[4] = slc; + params[5] = ctx->ac.i1false; + params[6] = da; ac_get_image_intr_name("llvm.amdgcn.image.load", ctx->ac.v4f32, /* vdata */ @@ -3689,12 +3654,13 @@ static void visit_image_store(struct ac_nir_context *ctx, char intrinsic_name[64]; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = glsl_without_array(var->type); + const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); LLVMValueRef glc = ctx->ac.i1false; bool force_glc = ctx->ac.chip_class == SI; if (force_glc) glc = ctx->ac.i1true; - if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { + if (dim == GLSL_SAMPLER_DIM_BUF) { params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true); params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), @@ -3706,8 +3672,8 @@ static void visit_image_store(struct ac_nir_context *ctx, params, 6, 0); } else { bool is_da = glsl_sampler_type_is_array(type) || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D; + dim == GLSL_SAMPLER_DIM_CUBE || + dim == GLSL_SAMPLER_DIM_3D; LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false; LLVMValueRef slc = ctx->ac.i1false; @@ -3715,18 +3681,10 @@ static void visit_image_store(struct ac_nir_context *ctx, params[1] = get_image_coords(ctx, instr); /* coords */ params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true); params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ - if (HAVE_LLVM <= 0x0309) { - params[4] = ctx->ac.i1false; /* r128 */ - params[5] = da; - params[6] = glc; - params[7] = slc; - } else { - LLVMValueRef lwe = ctx->ac.i1false; - params[4] = glc; - params[5] = slc; - params[6] = lwe; - params[7] = da; - } + params[4] = glc; + params[5] = slc; + params[6] = ctx->ac.i1false; + params[7] = da; ac_get_image_intr_name("llvm.amdgcn.image.store", LLVMTypeOf(params[0]), /* vdata */ @@ -4005,13 +3963,11 @@ static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, return result; } -#define INTERP_CENTER 0 -#define INTERP_CENTROID 1 -#define INTERP_SAMPLE 2 - -static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, +static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, enum glsl_interp_mode interp, unsigned location) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + switch (interp) { case INTERP_MODE_FLAT: default: @@ -4037,14 +3993,16 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, return NULL; } -static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, +static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + LLVMValueRef result; LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); ptr = LLVMBuildBitCast(ctx->builder, ptr, - const_array(ctx->ac.v2f32, 64), ""); + ac_array_in_const_addr_space(ctx->ac.v2f32), ""); sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, ""); result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); @@ -4056,8 +4014,8 @@ static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) { LLVMValueRef values[2]; - values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]); - values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]); + values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0], 32); + values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1], 32); return ac_build_gather_values(&ctx->ac, values, 2); } @@ -4085,7 +4043,7 @@ static LLVMValueRef load_sample_mask_in(struct ac_nir_context *ctx) return result; } -static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_interp(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef result[4]; @@ -4103,33 +4061,33 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: location = INTERP_CENTER; - src0 = get_src(ctx->nir, instr->src[0]); + src0 = get_src(ctx, instr->src[0]); break; default: break; } if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { - src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_0, "")); - src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_1, "")); + src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_0, "")); + src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_1, "")); } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { LLVMValueRef sample_position; LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); /* fetch sample ID */ - sample_position = load_sample_position(ctx, src0); + sample_position = ctx->abi->load_sample_position(ctx->abi, src0); - src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_0, ""); - src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, ""); - src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_1, ""); - src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, ""); + src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_0, ""); + src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, ""); + src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_1, ""); + src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, ""); } - interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location); + interp_param = ctx->abi->lookup_interp_param(ctx->abi, instr->variables[0]->var->data.interpolation, location); attr_number = LLVMConstInt(ctx->ac.i32, input_index, false); if (location == INTERP_CENTER) { LLVMValueRef ij_out[2]; - LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param); + LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param); /* * take the I then J parameters, and the DDX/Y for it, and @@ -4142,24 +4100,24 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, for (unsigned i = 0; i < 2; i++) { LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false); LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); - LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder, + LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, ""); - LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder, + LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, ""); - LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder, + LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, ""); LLVMValueRef temp1, temp2; - interp_el = LLVMBuildBitCast(ctx->builder, interp_el, + interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, ""); - temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, ""); - temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, ""); + temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, ""); + temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, ""); - temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, ""); - temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, ""); + temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, ""); + temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, ""); - ij_out[i] = LLVMBuildBitCast(ctx->builder, + ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, ""); } interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); @@ -4170,21 +4128,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); if (interp_param) { - interp_param = LLVMBuildBitCast(ctx->builder, + interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, ""); LLVMValueRef i = LLVMBuildExtractElement( - ctx->builder, interp_param, ctx->ac.i32_0, ""); + ctx->ac.builder, interp_param, ctx->ac.i32_0, ""); LLVMValueRef j = LLVMBuildExtractElement( - ctx->builder, interp_param, ctx->ac.i32_1, ""); + ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); result[chan] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, - ctx->prim_mask, i, j); + ctx->abi->prim_mask, i, j); } else { result[chan] = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false), llvm_chan, attr_number, - ctx->prim_mask); + ctx->abi->prim_mask); } } return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, @@ -4338,8 +4296,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, LLVMValueRef values[3]; for (int i = 0; i < 3; i++) { - values[i] = ctx->nctx->workgroup_ids[i] ? - ctx->nctx->workgroup_ids[i] : ctx->ac.i32_0; + values[i] = ctx->abi->workgroup_ids[i] ? + ctx->abi->workgroup_ids[i] : ctx->ac.i32_0; } result = ac_build_gather_values(&ctx->ac, values, 3); @@ -4349,12 +4307,15 @@ static void visit_intrinsic(struct ac_nir_context *ctx, result = ctx->abi->base_vertex; break; } + case nir_intrinsic_load_local_group_size: + result = ctx->abi->load_local_group_size(ctx->abi); + break; case nir_intrinsic_load_vertex_id_zero_base: { result = ctx->abi->vertex_id; break; } case nir_intrinsic_load_local_invocation_id: { - result = ctx->nctx->local_invocation_ids; + result = ctx->abi->local_invocation_ids; break; } case nir_intrinsic_load_base_instance: @@ -4414,7 +4375,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, result = ctx->abi->instance_id; break; case nir_intrinsic_load_num_work_groups: - result = ctx->nctx->num_work_groups; + result = ctx->abi->num_work_groups; break; case nir_intrinsic_load_local_invocation_index: result = visit_load_local_invocation_index(ctx->nctx); @@ -4477,6 +4438,9 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_image_size: result = visit_image_size(ctx, instr); break; + case nir_intrinsic_shader_clock: + result = ac_build_shader_clock(&ctx->ac); + break; case nir_intrinsic_discard: case nir_intrinsic_discard_if: emit_discard(ctx, instr); @@ -4507,7 +4471,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_interp_var_at_centroid: case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: - result = visit_interp(ctx->nctx, instr); + result = visit_interp(ctx, instr); break; case nir_intrinsic_emit_vertex: ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->outputs); @@ -4561,8 +4525,14 @@ static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr, bool write) { struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + LLVMValueRef result; - return LLVMBuildLoad(ctx->builder, buffer_ptr, ""); + LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + result = LLVMBuildLoad(ctx->builder, buffer_ptr, ""); + LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); + + return result; } static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) @@ -4649,7 +4619,7 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, const_array(type, 0), ""); + list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); return ac_build_load_to_sgpr(&ctx->ac, list, index); } @@ -5347,6 +5317,9 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, int index = variable->data.location - VERT_ATTRIB_GENERIC0; int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); + uint8_t input_usage_mask = + ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; + unsigned num_channels = util_last_bit(input_usage_mask); variable->data.driver_location = idx * 4; @@ -5371,7 +5344,9 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, input = ac_build_buffer_load_format(&ctx->ac, t_list, buffer_index, ctx->ac.i32_0, - 4, true); + num_channels, false, true); + + input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); @@ -5454,7 +5429,7 @@ handle_fs_input_decl(struct nir_to_llvm_context *ctx, else interp_type = INTERP_CENTER; - interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type); + interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); } else interp = NULL; @@ -5491,7 +5466,7 @@ prepare_interp_optimize(struct nir_to_llvm_context *ctx, } if (uses_center && uses_centroid) { - LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->prim_mask, ctx->ac.i32_0, ""); + LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->abi.prim_mask, ctx->ac.i32_0, ""); ctx->persp_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->persp_center, ctx->persp_centroid, ""); ctx->linear_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->linear_center, ctx->linear_centroid, ""); } @@ -5522,7 +5497,7 @@ handle_fs_inputs(struct nir_to_llvm_context *ctx, if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { interp_param = *inputs; - interp_fs_input(ctx, index, interp_param, ctx->prim_mask, + interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, inputs); if (!interp_param) @@ -5758,32 +5733,11 @@ setup_shared(struct ac_nir_context *ctx, LLVMAddGlobalInAddressSpace( ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type), variable->name ? variable->name : "", - LOCAL_ADDR_SPACE); + AC_LOCAL_ADDR_SPACE); _mesa_hash_table_insert(ctx->vars, variable, shared); } } -static LLVMValueRef -emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi) -{ - v = ac_to_float(ctx, v); - v = emit_intrin_2f_param(ctx, "llvm.maxnum", ctx->f32, v, LLVMConstReal(ctx->f32, lo)); - return emit_intrin_2f_param(ctx, "llvm.minnum", ctx->f32, v, LLVMConstReal(ctx->f32, hi)); -} - - -static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMValueRef const16 = LLVMConstInt(ctx->ac.i32, 16, false); - LLVMValueRef comp[2]; - - comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx->ac.i32, 65535, 0), ""); - comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx->ac.i32, 65535, 0), ""); - comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, ""); - return LLVMBuildOr(ctx->builder, comp[0], comp[1], ""); -} - /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_export_args(struct nir_to_llvm_context *ctx, @@ -5809,15 +5763,16 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, args->out[2] = LLVMGetUndef(ctx->ac.f32); args->out[3] = LLVMGetUndef(ctx->ac.f32); - if (!values) - return; - if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { - LLVMValueRef val[4]; unsigned index = target - V_008DFC_SQ_EXP_MRT; unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; + unsigned chan; + + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL; + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2], + unsigned bits, bool hi) = NULL; switch(col_format) { case V_028714_SPI_SHADER_ZERO: @@ -5843,106 +5798,91 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, break; case V_028714_SPI_SHADER_FP16_ABGR: - args->compr = 1; - - for (unsigned chan = 0; chan < 2; chan++) { - LLVMValueRef pack_args[2] = { - values[2 * chan], - values[2 * chan + 1] - }; - LLVMValueRef packed; - - packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args); - args->out[chan] = packed; - } + packf = ac_build_cvt_pkrtz_f16; break; case V_028714_SPI_SHADER_UNORM16_ABGR: - for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = ac_build_clamp(&ctx->ac, values[chan]); - val[chan] = LLVMBuildFMul(ctx->builder, val[chan], - LLVMConstReal(ctx->ac.f32, 65535), ""); - val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], - LLVMConstReal(ctx->ac.f32, 0.5), ""); - val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], - ctx->ac.i32, ""); - } - - args->compr = 1; - args->out[0] = emit_pack_int16(ctx, val[0], val[1]); - args->out[1] = emit_pack_int16(ctx, val[2], val[3]); + packf = ac_build_cvt_pknorm_u16; break; case V_028714_SPI_SHADER_SNORM16_ABGR: - for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1); - val[chan] = LLVMBuildFMul(ctx->builder, val[chan], - LLVMConstReal(ctx->ac.f32, 32767), ""); - - /* If positive, add 0.5, else add -0.5. */ - val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], - LLVMBuildSelect(ctx->builder, - LLVMBuildFCmp(ctx->builder, LLVMRealOGE, - val[chan], ctx->ac.f32_0, ""), - LLVMConstReal(ctx->ac.f32, 0.5), - LLVMConstReal(ctx->ac.f32, -0.5), ""), ""); - val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->ac.i32, ""); - } - - args->compr = 1; - args->out[0] = emit_pack_int16(ctx, val[0], val[1]); - args->out[1] = emit_pack_int16(ctx, val[2], val[3]); + packf = ac_build_cvt_pknorm_i16; break; - case V_028714_SPI_SHADER_UINT16_ABGR: { - LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32, - is_int8 ? 255 : is_int10 ? 1023 : 65535, 0); - LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->ac.i32, 3, 0); + case V_028714_SPI_SHADER_UINT16_ABGR: + packi = ac_build_cvt_pk_u16; + break; - for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = ac_to_integer(&ctx->ac, values[chan]); - val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb); - } + case V_028714_SPI_SHADER_SINT16_ABGR: + packi = ac_build_cvt_pk_i16; + break; - args->compr = 1; - args->out[0] = emit_pack_int16(ctx, val[0], val[1]); - args->out[1] = emit_pack_int16(ctx, val[2], val[3]); + default: + case V_028714_SPI_SHADER_32_ABGR: + memcpy(&args->out[0], values, sizeof(values[0]) * 4); break; } - case V_028714_SPI_SHADER_SINT16_ABGR: { - LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32, - is_int8 ? 127 : is_int10 ? 511 : 32767, 0); - LLVMValueRef min_rgb = LLVMConstInt(ctx->ac.i32, - is_int8 ? -128 : is_int10 ? -512 : -32768, 0); - LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->ac.i32_1; - LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->ac.i32, -2, 0); + /* Pack f16 or norm_i16/u16. */ + if (packf) { + for (chan = 0; chan < 2; chan++) { + LLVMValueRef pack_args[2] = { + values[2 * chan], + values[2 * chan + 1] + }; + LLVMValueRef packed; - /* Clamp. */ - for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = ac_to_integer(&ctx->ac, values[chan]); - val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb); - val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb); + packed = packf(&ctx->ac, pack_args); + args->out[chan] = ac_to_float(&ctx->ac, packed); } - - args->compr = 1; - args->out[0] = emit_pack_int16(ctx, val[0], val[1]); - args->out[1] = emit_pack_int16(ctx, val[2], val[3]); - break; + args->compr = 1; /* COMPR flag */ } - default: - case V_028714_SPI_SHADER_32_ABGR: - memcpy(&args->out[0], values, sizeof(values[0]) * 4); - break; + /* Pack i16/u16. */ + if (packi) { + for (chan = 0; chan < 2; chan++) { + LLVMValueRef pack_args[2] = { + ac_to_integer(&ctx->ac, values[2 * chan]), + ac_to_integer(&ctx->ac, values[2 * chan + 1]) + }; + LLVMValueRef packed; + + packed = packi(&ctx->ac, pack_args, + is_int8 ? 8 : is_int10 ? 10 : 16, + chan == 1); + args->out[chan] = ac_to_float(&ctx->ac, packed); + } + args->compr = 1; /* COMPR flag */ } - } else - memcpy(&args->out[0], values, sizeof(values[0]) * 4); + return; + } + + memcpy(&args->out[0], values, sizeof(values[0]) * 4); for (unsigned i = 0; i < 4; ++i) args->out[i] = ac_to_float(&ctx->ac, args->out[i]); } +static void +radv_export_param(struct nir_to_llvm_context *ctx, unsigned index, + LLVMValueRef *values) +{ + struct ac_export_args args; + + si_llvm_init_export_args(ctx, values, + V_008DFC_SQ_EXP_PARAM + index, &args); + ac_build_export(&ctx->ac, &args); +} + +static LLVMValueRef +radv_load_output(struct nir_to_llvm_context *ctx, unsigned index, unsigned chan) +{ + LLVMValueRef output = + ctx->nir->outputs[radeon_llvm_reg_index_soa(index, chan)]; + + return LLVMBuildLoad(ctx->builder, output, ""); +} + static void handle_vs_outputs_post(struct nir_to_llvm_context *ctx, bool export_prim_id, @@ -5979,8 +5919,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, i = VARYING_SLOT_CLIP_DIST0; for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) - slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) slots[i] = LLVMGetUndef(ctx->ac.f32); @@ -6002,27 +5941,23 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { for (unsigned j = 0; j < 4; j++) - pos_values[j] = LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], ""); + pos_values[j] = radv_load_output(ctx, VARYING_SLOT_POS, j); } si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]); if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { outinfo->writes_pointsize = true; - psize_value = LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], ""); + psize_value = radv_load_output(ctx, VARYING_SLOT_PSIZ, 0); } if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { outinfo->writes_layer = true; - layer_value = LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], ""); + layer_value = radv_load_output(ctx, VARYING_SLOT_LAYER, 0); } if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { outinfo->writes_viewport_index = true; - viewport_index_value = LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], ""); + viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0); } if (outinfo->writes_pointsize || @@ -6086,50 +6021,31 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, if (!(ctx->output_mask & (1ull << i))) continue; - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); - - if (i == VARYING_SLOT_LAYER) { - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count; - param_count++; - } else if (i == VARYING_SLOT_PRIMITIVE_ID) { - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; - param_count++; - } else if (i >= VARYING_SLOT_VAR0) { - outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[i] = param_count; - param_count++; - } else + if (i != VARYING_SLOT_LAYER && + i != VARYING_SLOT_PRIMITIVE_ID && + i < VARYING_SLOT_VAR0) continue; - si_llvm_init_export_args(ctx, values, target, &args); + for (unsigned j = 0; j < 4; j++) + values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); - if (target >= V_008DFC_SQ_EXP_POS && - target <= (V_008DFC_SQ_EXP_POS + 3)) { - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - } else { - ac_build_export(&ctx->ac, &args); - } + radv_export_param(ctx, param_count, values); + + outinfo->vs_output_param_offset[i] = param_count++; } if (export_prim_id) { LLVMValueRef values[4]; - target = V_008DFC_SQ_EXP_PARAM + param_count; - outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; - param_count++; values[0] = ctx->vs_prim_id; ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); for (unsigned j = 1; j < 4; j++) values[j] = ctx->ac.f32_0; - si_llvm_init_export_args(ctx, values, target, &args); - ac_build_export(&ctx->ac, &args); + + radv_export_param(ctx, param_count, values); + + outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++; outinfo->export_prim_id = true; } @@ -6488,12 +6404,12 @@ handle_tcs_outputs_post(struct nir_to_llvm_context *ctx) static bool si_export_mrt_color(struct nir_to_llvm_context *ctx, - LLVMValueRef *color, unsigned param, bool is_last, + LLVMValueRef *color, unsigned index, bool is_last, struct ac_export_args *args) { /* Export */ - si_llvm_init_export_args(ctx, color, param, - args); + si_llvm_init_export_args(ctx, color, + V_008DFC_SQ_EXP_MRT + index, args); if (is_last) { args->valid_mask = 1; /* whether the EXEC mask is valid */ @@ -6525,45 +6441,52 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef values[4]; + bool last = false; if (!(ctx->output_mask & (1ull << i))) continue; - if (i == FRAG_RESULT_DEPTH) { - ctx->shader_info->fs.writes_z = true; - depth = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); - } else if (i == FRAG_RESULT_STENCIL) { - ctx->shader_info->fs.writes_stencil = true; - stencil = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); - } else if (i == FRAG_RESULT_SAMPLE_MASK) { - ctx->shader_info->fs.writes_sample_mask = true; - samplemask = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); - } else { - bool last = false; - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + if (i < FRAG_RESULT_DATA0) + continue; - if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask) - last = ctx->output_mask <= ((1ull << (i + 1)) - 1); + for (unsigned j = 0; j < 4; j++) + values[j] = ac_to_float(&ctx->ac, + radv_load_output(ctx, i, j)); - bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]); - if (ret) - index++; - } + if (!ctx->shader_info->info.ps.writes_z && + !ctx->shader_info->info.ps.writes_stencil && + !ctx->shader_info->info.ps.writes_sample_mask) + last = ctx->output_mask <= ((1ull << (i + 1)) - 1); + + bool ret = si_export_mrt_color(ctx, values, + i - FRAG_RESULT_DATA0, + last, &color_args[index]); + if (ret) + index++; + } + + /* Process depth, stencil, samplemask. */ + if (ctx->shader_info->info.ps.writes_z) { + depth = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_DEPTH, 0)); + } + if (ctx->shader_info->info.ps.writes_stencil) { + stencil = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_STENCIL, 0)); + } + if (ctx->shader_info->info.ps.writes_sample_mask) { + samplemask = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); } + /* Export PS outputs. */ for (unsigned i = 0; i < index; i++) ac_build_export(&ctx->ac, &color_args[i]); + if (depth || stencil || samplemask) radv_export_mrt_z(ctx, depth, stencil, samplemask); - else if (!index) { - si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]); - ac_build_export(&ctx->ac, &color_args[0]); - } + else if (!index) + ac_build_export_null(&ctx->ac); } static void @@ -6784,8 +6707,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, visit_cf_list(&ctx, &func->impl->body); phi_post_pass(&ctx); - ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS, - ctx.outputs); + if (nir->info.stage != MESA_SHADER_COMPUTE) + ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS, + ctx.outputs); free(ctx.locals); ralloc_free(ctx.defs); @@ -6898,6 +6822,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, } } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; + ctx.abi.lookup_interp_param = lookup_interp_param; + ctx.abi.load_sample_position = load_sample_position; } if (i) @@ -7174,16 +7100,9 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, static void ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) { - LLVMValueRef args[9]; - args[0] = ctx->gsvs_ring; - args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), ""); - args[3] = ctx->ac.i32_0; - args[4] = ctx->ac.i32_1; /* OFFEN */ - args[5] = ctx->ac.i32_0; /* IDXEN */ - args[6] = ctx->ac.i32_1; /* GLC */ - args[7] = ctx->ac.i32_1; /* SLC */ - args[8] = ctx->ac.i32_0; /* TFE */ - + LLVMValueRef vtx_offset = + LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, + LLVMConstInt(ctx->ac.i32, 4, false), ""); int idx = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { @@ -7201,16 +7120,16 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) } for (unsigned j = 0; j < length; j++) { - LLVMValueRef value; - args[2] = LLVMConstInt(ctx->ac.i32, + LLVMValueRef value, soffset; + + soffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices * 16 * 4, false); - value = ac_build_intrinsic(&ctx->ac, - "llvm.SI.buffer.load.dword.i32.i32", - ctx->ac.i32, args, 9, - AC_FUNC_ATTR_READONLY | - AC_FUNC_ATTR_LEGACY); + value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring, + 1, ctx->ac.i32_0, + vtx_offset, soffset, + 0, 1, 1, true, false); LLVMBuildStore(ctx->builder, ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);