X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_nir_to_llvm.c;h=ecddb5e9b9b10923e4223db8c0df6b5f72f74605;hb=4ba201ee361a07a084665e022efd6b0d6df2ffd6;hp=9a69066afa209c40f936384fb7097115fbde3ed2;hpb=91dd2ca99fee79e110d22f3a238a70a4af789075;p=mesa.git diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9a69066afa2..ecddb5e9b9b 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -30,6 +30,7 @@ #include "../vulkan/radv_descriptor_set.h" #include "util/bitscan.h" #include +#include "ac_shader_abi.h" #include "ac_shader_info.h" #include "ac_exp_param.h" @@ -46,17 +47,37 @@ enum radeon_llvm_calling_convention { #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) -enum desc_type { - DESC_IMAGE, - DESC_FMASK, - DESC_SAMPLER, - DESC_BUFFER, +struct nir_to_llvm_context; + +struct ac_nir_context { + struct ac_llvm_context ac; + struct ac_shader_abi *abi; + + gl_shader_stage stage; + + struct hash_table *defs; + struct hash_table *phis; + struct hash_table *vars; + + LLVMValueRef main_function; + LLVMBasicBlockRef continue_block; + LLVMBasicBlockRef break_block; + + LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; + + int num_locals; + LLVMValueRef *locals; + + struct nir_to_llvm_context *nctx; /* TODO get rid of this */ }; struct nir_to_llvm_context { struct ac_llvm_context ac; const struct ac_nir_compiler_options *options; struct ac_shader_variant_info *shader_info; + struct ac_shader_abi abi; + struct ac_nir_context *nir; + unsigned max_workgroup_size; LLVMContextRef context; LLVMModuleRef module; @@ -65,7 +86,6 @@ struct nir_to_llvm_context { struct hash_table *defs; struct hash_table *phis; - struct hash_table *vars; LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; LLVMValueRef ring_offsets; @@ -76,13 +96,8 @@ struct nir_to_llvm_context { LLVMValueRef tg_size; LLVMValueRef vertex_buffers; - LLVMValueRef base_vertex; - LLVMValueRef start_instance; - LLVMValueRef draw_index; - LLVMValueRef vertex_id; LLVMValueRef rel_auto_id; LLVMValueRef vs_prim_id; - LLVMValueRef instance_id; LLVMValueRef ls_out_layout; LLVMValueRef es2gs_offset; @@ -120,9 +135,6 @@ struct nir_to_llvm_context { LLVMValueRef sample_coverage; LLVMValueRef frag_pos[4]; - LLVMBasicBlockRef continue_block; - LLVMBasicBlockRef break_block; - LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i16; @@ -153,12 +165,9 @@ struct nir_to_llvm_context { LLVMValueRef lds; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; - LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; uint64_t input_mask; uint64_t output_mask; - int num_locals; - LLVMValueRef *locals; uint8_t num_output_clips; uint8_t num_output_culls; @@ -173,9 +182,18 @@ struct nir_to_llvm_context { uint64_t tess_patch_outputs_written; }; -static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, +static inline struct nir_to_llvm_context * +nir_to_llvm_context_from_abi(struct ac_shader_abi *abi) +{ + struct nir_to_llvm_context *ctx = NULL; + return container_of(abi, ctx, abi); +} + +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, - enum desc_type desc_type); + enum ac_descriptor_type desc_type, + bool image, bool write); + static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) { return (index * 4) + chan; @@ -727,20 +745,20 @@ static void create_function(struct nir_to_llvm_context *ctx) if (!ctx->is_gs_copy_shader) { if (ctx->shader_info->info.vs.has_vertex_buffers) add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */ - add_user_sgpr_argument(&args, ctx->i32, &ctx->base_vertex); // base vertex - add_user_sgpr_argument(&args, ctx->i32, &ctx->start_instance);// start instance + add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.base_vertex); // base vertex + add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.start_instance);// start instance if (ctx->shader_info->info.vs.needs_draw_id) - add_user_sgpr_argument(&args, ctx->i32, &ctx->draw_index); // draw id + add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.draw_id); // draw id } if (ctx->options->key.vs.as_es) add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset else if (ctx->options->key.vs.as_ls) add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout - add_vgpr_argument(&args, ctx->i32, &ctx->vertex_id); // vertex id + add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id if (!ctx->is_gs_copy_shader) { add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id - add_vgpr_argument(&args, ctx->i32, &ctx->instance_id); // instance id + add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id } break; case MESA_SHADER_TESS_CTRL: @@ -957,7 +975,7 @@ static int get_llvm_num_components(LLVMValueRef value) return num_components; } -static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx, +static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index) { @@ -967,11 +985,11 @@ static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx, if (count == 1) return value; - return LLVMBuildExtractElement(ctx->builder, value, - LLVMConstInt(ctx->i32, index, false), ""); + return LLVMBuildExtractElement(ac->builder, value, + LLVMConstInt(ac->i32, index, false), ""); } -static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx, +static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) { unsigned num_components = get_llvm_num_components(value); @@ -991,13 +1009,13 @@ static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx, } static void -build_store_values_extended(struct nir_to_llvm_context *ctx, +build_store_values_extended(struct ac_llvm_context *ac, LLVMValueRef *values, unsigned value_count, unsigned value_stride, LLVMValueRef vec) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ac->builder; unsigned i; if (value_count == 1) { @@ -1007,38 +1025,38 @@ build_store_values_extended(struct nir_to_llvm_context *ctx, for (i = 0; i < value_count; i++) { LLVMValueRef ptr = values[i * value_stride]; - LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); + LLVMValueRef index = LLVMConstInt(ac->i32, i, false); LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); LLVMBuildStore(builder, value, ptr); } } -static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx, +static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def) { - LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size); + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); if (def->num_components > 1) { type = LLVMVectorType(type, def->num_components); } return type; } -static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src) +static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) { assert(src.is_ssa); - struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa); + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa); return (LLVMValueRef)entry->data; } -static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx, +static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b) { - struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b); + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); return (LLVMBasicBlockRef)entry->data; } -static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, +static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src, unsigned num_components) { @@ -1059,20 +1077,20 @@ static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, if (need_swizzle || num_components != src_components) { LLVMValueRef masks[] = { - LLVMConstInt(ctx->i32, src.swizzle[0], false), - LLVMConstInt(ctx->i32, src.swizzle[1], false), - LLVMConstInt(ctx->i32, src.swizzle[2], false), - LLVMConstInt(ctx->i32, src.swizzle[3], false)}; + LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; if (src_components > 1 && num_components == 1) { - value = LLVMBuildExtractElement(ctx->builder, value, + value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], ""); } else if (src_components == 1 && num_components > 1) { LLVMValueRef values[] = {value, value, value, value}; value = ac_build_gather_values(&ctx->ac, values, num_components); } else { LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - value = LLVMBuildShuffleVector(ctx->builder, value, value, + value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, ""); } } @@ -1490,7 +1508,7 @@ static LLVMValueRef emit_ddxy_interp( return ac_build_gather_values(&ctx->ac, result, 4); } -static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *instr) +static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) { LLVMValueRef src[4], result = NULL; unsigned num_components = instr->dest.dest.ssa.num_components; @@ -1524,38 +1542,38 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst break; case nir_op_fneg: src[0] = to_float(&ctx->ac, src[0]); - result = LLVMBuildFNeg(ctx->builder, src[0], ""); + result = LLVMBuildFNeg(ctx->ac.builder, src[0], ""); break; case nir_op_ineg: - result = LLVMBuildNeg(ctx->builder, src[0], ""); + result = LLVMBuildNeg(ctx->ac.builder, src[0], ""); break; case nir_op_inot: - result = LLVMBuildNot(ctx->builder, src[0], ""); + result = LLVMBuildNot(ctx->ac.builder, src[0], ""); break; case nir_op_iadd: - result = LLVMBuildAdd(ctx->builder, src[0], src[1], ""); + result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fadd: src[0] = to_float(&ctx->ac, src[0]); src[1] = to_float(&ctx->ac, src[1]); - result = LLVMBuildFAdd(ctx->builder, src[0], src[1], ""); + result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fsub: src[0] = to_float(&ctx->ac, src[0]); src[1] = to_float(&ctx->ac, src[1]); - result = LLVMBuildFSub(ctx->builder, src[0], src[1], ""); + result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_isub: - result = LLVMBuildSub(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_imul: - result = LLVMBuildMul(ctx->builder, src[0], src[1], ""); + result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_imod: - result = LLVMBuildSRem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_umod: - result = LLVMBuildURem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fmod: src[0] = to_float(&ctx->ac, src[0]); @@ -1563,27 +1581,27 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst result = ac_build_fdiv(&ctx->ac, src[0], src[1]); result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", to_float_type(&ctx->ac, def_type), result); - result = LLVMBuildFMul(ctx->builder, src[1] , result, ""); - result = LLVMBuildFSub(ctx->builder, src[0], result, ""); + result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, ""); + result = LLVMBuildFSub(ctx->ac.builder, src[0], result, ""); break; case nir_op_frem: src[0] = to_float(&ctx->ac, src[0]); src[1] = to_float(&ctx->ac, src[1]); - result = LLVMBuildFRem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_irem: - result = LLVMBuildSRem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_idiv: - result = LLVMBuildSDiv(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_udiv: - result = LLVMBuildUDiv(ctx->builder, src[0], src[1], ""); + result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fmul: src[0] = to_float(&ctx->ac, src[0]); src[1] = to_float(&ctx->ac, src[1]); - result = LLVMBuildFMul(ctx->builder, src[0], src[1], ""); + result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fdiv: src[0] = to_float(&ctx->ac, src[0]); @@ -1592,32 +1610,32 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst break; case nir_op_frcp: src[0] = to_float(&ctx->ac, src[0]); - result = ac_build_fdiv(&ctx->ac, ctx->f32one, src[0]); + result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, src[0]); break; case nir_op_iand: - result = LLVMBuildAnd(ctx->builder, src[0], src[1], ""); + result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_ior: - result = LLVMBuildOr(ctx->builder, src[0], src[1], ""); + result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_ixor: - result = LLVMBuildXor(ctx->builder, src[0], src[1], ""); + result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_ishl: - result = LLVMBuildShl(ctx->builder, src[0], - LLVMBuildZExt(ctx->builder, src[1], + result = LLVMBuildShl(ctx->ac.builder, src[0], + LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""), ""); break; case nir_op_ishr: - result = LLVMBuildAShr(ctx->builder, src[0], - LLVMBuildZExt(ctx->builder, src[1], + result = LLVMBuildAShr(ctx->ac.builder, src[0], + LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""), ""); break; case nir_op_ushr: - result = LLVMBuildLShr(ctx->builder, src[0], - LLVMBuildZExt(ctx->builder, src[1], + result = LLVMBuildLShr(ctx->ac.builder, src[0], + LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), ""), ""); break; @@ -1719,7 +1737,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst case nir_op_frsq: result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", to_float_type(&ctx->ac, def_type), src[0]); - result = ac_build_fdiv(&ctx->ac, ctx->f32one, result); + result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, result); break; case nir_op_fpow: result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", @@ -1755,10 +1773,10 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); break; case nir_op_bitfield_reverse: - result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: - result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_vec2: case nir_op_vec3: @@ -1770,40 +1788,40 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst case nir_op_f2i32: case nir_op_f2i64: src[0] = to_float(&ctx->ac, src[0]); - result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, ""); + result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_f2u32: case nir_op_f2u64: src[0] = to_float(&ctx->ac, src[0]); - result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, ""); + result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_i2f32: case nir_op_i2f64: - result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), ""); + result = LLVMBuildSIToFP(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_u2f32: case nir_op_u2f64: - result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), ""); + result = LLVMBuildUIToFP(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_f2f64: - result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), ""); + result = LLVMBuildFPExt(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_f2f32: - result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), ""); + result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_u2u32: case nir_op_u2u64: if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildZExt(ctx->builder, src[0], def_type, ""); + result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); else - result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_i2i32: case nir_op_i2i64: if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildSExt(ctx->builder, src[0], def_type, ""); + result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); else - result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_bcsel: result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); @@ -1836,7 +1854,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst result = emit_i2b(&ctx->ac, src[0]); break; case nir_op_fquantize2f16: - result = emit_f2f16(ctx, src[0]); + result = emit_f2f16(ctx->nctx, src[0]); break; case nir_op_umul_high: result = emit_umul_high(&ctx->ac, src[0], src[1]); @@ -1856,36 +1874,36 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst case nir_op_fddy_fine: case nir_op_fddx_coarse: case nir_op_fddy_coarse: - result = emit_ddxy(ctx, instr->op, src[0]); + result = emit_ddxy(ctx->nctx, instr->op, src[0]); break; case nir_op_unpack_64_2x32_split_x: { assert(instr->src[0].src.ssa->num_components == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0], - LLVMVectorType(ctx->i32, 2), + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], + LLVMVectorType(ctx->ac.i32, 2), ""); - result = LLVMBuildExtractElement(ctx->builder, tmp, - ctx->i32zero, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, + ctx->ac.i32_0, ""); break; } case nir_op_unpack_64_2x32_split_y: { assert(instr->src[0].src.ssa->num_components == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0], - LLVMVectorType(ctx->i32, 2), + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], + LLVMVectorType(ctx->ac.i32, 2), ""); - result = LLVMBuildExtractElement(ctx->builder, tmp, - ctx->i32one, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, + ctx->ac.i32_0, ""); break; } case nir_op_pack_64_2x32_split: { - LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->i32, 2)); - tmp = LLVMBuildInsertElement(ctx->builder, tmp, - src[0], ctx->i32zero, ""); - tmp = LLVMBuildInsertElement(ctx->builder, tmp, - src[1], ctx->i32one, ""); - result = LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, ""); + LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, 2)); + tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, + src[0], ctx->ac.i32_0, ""); + tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, + src[1], ctx->ac.i32_1, ""); + result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); break; } @@ -1904,12 +1922,12 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst } } -static void visit_load_const(struct nir_to_llvm_context *ctx, +static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr) { LLVMValueRef values[4], value = NULL; LLVMTypeRef element_type = - LLVMIntTypeInContext(ctx->context, instr->def.bit_size); + LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); for (unsigned i = 0; i < instr->def.num_components; ++i) { switch (instr->def.bit_size) { @@ -1945,27 +1963,27 @@ static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr, } static LLVMValueRef -get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements) +get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements) { LLVMValueRef size = - LLVMBuildExtractElement(ctx->builder, descriptor, - LLVMConstInt(ctx->i32, 2, false), ""); + LLVMBuildExtractElement(ctx->ac.builder, descriptor, + LLVMConstInt(ctx->ac.i32, 2, false), ""); /* VI only */ - if (ctx->options->chip_class >= VI && in_elements) { + if (ctx->abi->chip_class >= VI && in_elements) { /* On VI, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ LLVMValueRef stride = - LLVMBuildExtractElement(ctx->builder, descriptor, - LLVMConstInt(ctx->i32, 1, false), ""); - stride = LLVMBuildLShr(ctx->builder, stride, - LLVMConstInt(ctx->i32, 16, false), ""); - stride = LLVMBuildAnd(ctx->builder, stride, - LLVMConstInt(ctx->i32, 0x3fff, false), ""); + LLVMBuildExtractElement(ctx->ac.builder, descriptor, + LLVMConstInt(ctx->ac.i32, 1, false), ""); + stride = LLVMBuildLShr(ctx->ac.builder, stride, + LLVMConstInt(ctx->ac.i32, 16, false), ""); + stride = LLVMBuildAnd(ctx->ac.builder, stride, + LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); - size = LLVMBuildUDiv(ctx->builder, size, stride, ""); + size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); } return size; } @@ -1987,7 +2005,7 @@ static void build_int_type_name( strcpy(buf, "i32"); } -static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, +static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args, const nir_tex_instr *instr) { @@ -2006,15 +2024,15 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; txq_args.opcode = ac_image_get_resinfo; txq_args.dmask = 0xf; - txq_args.addr = ctx->i32zero; + txq_args.addr = ctx->i32_0; txq_args.resource = args->resource; - LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args); + LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); - half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]); + half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); } @@ -2050,11 +2068,11 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, /* workaround 8/8/8/8 uint/sint cube gather bug */ /* first detect it then change to a scaled read and f2i */ - tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, ""); + tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); tmp2 = tmp; /* extract the DATA_FORMAT */ - tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false), + tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false), LLVMConstInt(ctx->i32, 6, false), false); /* is the DATA_FORMAT == 8_8_8_8 */ @@ -2073,13 +2091,13 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), ""); tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, ""); - args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, ""); + args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); /* don't modify the coordinates for this case */ coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); } args->addr = coord; - result = ac_build_image_opcode(&ctx->ac, args); + result = ac_build_image_opcode(ctx, args); if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { LLVMValueRef tmp, tmp2; @@ -2101,7 +2119,7 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, return result; } -static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, +static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr, bool lod_is_zero, struct ac_image_args *args) @@ -2110,7 +2128,7 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, return ac_build_buffer_load_format(&ctx->ac, args->resource, args->addr, - LLVMConstInt(ctx->i32, 0, false), + LLVMConstInt(ctx->ac.i32, 0, false), true); } @@ -2161,7 +2179,7 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, if (instr->op == nir_texop_tg4) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { - return radv_lower_gather4_integer(ctx, args, instr); + return radv_lower_gather4_integer(&ctx->ac, args, instr); } } return ac_build_image_opcode(&ctx->ac, args); @@ -2170,7 +2188,7 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef index = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx->nir, instr->src[0]); unsigned desc_set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; @@ -2206,15 +2224,15 @@ static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx, LLVMValueRef ptr, addr; addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0); - addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), ""); + addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), ""); ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr); - ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa)); + ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa)); return LLVMBuildLoad(ctx->builder, ptr, ""); } -static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef desc = get_src(ctx, instr->src[0]); @@ -2225,7 +2243,7 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { const char *store_name; - LLVMValueRef src_data = get_src(ctx, instr->src[0]); + LLVMValueRef src_data = get_src(ctx->nir, instr->src[0]); LLVMTypeRef data_type = ctx->f32; int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32; int components_32bit = elem_size_mult * instr->num_components; @@ -2236,7 +2254,7 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, if (ctx->stage == MESA_SHADER_FRAGMENT) ctx->shader_info->fs.writes_memory = true; - params[1] = get_src(ctx, instr->src[1]); + params[1] = get_src(ctx->nir, instr->src[1]); params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ params[4] = ctx->i1false; /* glc */ params[5] = ctx->i1false; /* slc */ @@ -2245,10 +2263,10 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, data_type = LLVMVectorType(ctx->f32, components_32bit); base_data = to_float(&ctx->ac, src_data); - base_data = trim_vector(ctx, base_data, instr->num_components); + base_data = trim_vector(&ctx->ac, base_data, instr->num_components); base_data = LLVMBuildBitCast(ctx->builder, base_data, data_type, ""); - base_offset = get_src(ctx, instr->src[2]); /* voffset */ + base_offset = get_src(ctx->nir, instr->src[2]); /* voffset */ while (writemask) { int start, count; LLVMValueRef data; @@ -2317,12 +2335,12 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, ctx->shader_info->fs.writes_memory = true; if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0); + params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx->nir, instr->src[3]), 0); } - params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0); - params[arg_count++] = get_src(ctx, instr->src[0]); + params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx->nir, instr->src[2]), 0); + params[arg_count++] = get_src(ctx->nir, instr->src[0]); params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ - params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ + params[arg_count++] = get_src(ctx->nir, instr->src[1]); /* voffset */ params[arg_count++] = ctx->i1false; /* slc */ switch (instr->intrinsic) { @@ -2377,7 +2395,7 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, const char *load_name; LLVMTypeRef data_type = ctx->f32; LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false); - offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, ""); + offset = LLVMBuildAdd(ctx->builder, get_src(ctx->nir, instr->src[1]), offset, ""); if (load_components == 3) data_type = LLVMVectorType(ctx->f32, 4); @@ -2394,7 +2412,7 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, unreachable("unhandled number of components"); LLVMValueRef params[] = { - get_src(ctx, instr->src[0]), + get_src(ctx->nir, instr->src[0]), LLVMConstInt(ctx->i32, 0, false), offset, ctx->i1false, @@ -2420,10 +2438,10 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, } return LLVMBuildBitCast(ctx->builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); + get_def_type(ctx->nir, &instr->dest.ssa), ""); } -static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef results[8], ret; @@ -2431,16 +2449,19 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, LLVMValueRef offset = get_src(ctx, instr->src[1]); int num_components = instr->num_components; + if (ctx->abi->load_ubo) + rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); + if (instr->dest.ssa.bit_size == 64) num_components *= 2; for (unsigned i = 0; i < num_components; ++i) { LLVMValueRef params[] = { rsrc, - LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), + LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 4 * i, 0), offset, "") }; - results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->f32, + results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->ac.f32, params, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_LEGACY); @@ -2448,15 +2469,15 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, ret = ac_build_gather_values(&ctx->ac, results, instr->num_components); - return LLVMBuildBitCast(ctx->builder, ret, + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void -radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref, - bool vs_in, unsigned *vertex_index_out, - LLVMValueRef *vertex_index_ref, - unsigned *const_out, LLVMValueRef *indir_out) +get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref, + bool vs_in, unsigned *vertex_index_out, + LLVMValueRef *vertex_index_ref, + unsigned *const_out, LLVMValueRef *indir_out) { unsigned const_offset = 0; nir_deref *tail = &deref->deref; @@ -2469,9 +2490,9 @@ radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref, *vertex_index_out = deref_array->base_offset; if (vertex_index_ref) { - LLVMValueRef vtx = LLVMConstInt(ctx->i32, deref_array->base_offset, false); + LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - vtx = LLVMBuildAdd(ctx->builder, vtx, get_src(ctx, deref_array->indirect), ""); + vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), ""); } *vertex_index_ref = vtx; } @@ -2503,11 +2524,11 @@ radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref, assert(deref_array->deref_array_type == nir_deref_array_type_indirect); index = get_src(ctx, deref_array->indirect); - stride = LLVMConstInt(ctx->i32, size, 0); - local_offset = LLVMBuildMul(ctx->builder, stride, index, ""); + stride = LLVMConstInt(ctx->ac.i32, size, 0); + local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, ""); if (offset) - offset = LLVMBuildAdd(ctx->builder, offset, local_offset, ""); + offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, ""); else offset = local_offset; } else if (tail->deref_type == nir_deref_type_struct) { @@ -2523,8 +2544,8 @@ radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref, } out: if (const_offset && offset) - offset = LLVMBuildAdd(ctx->builder, offset, - LLVMConstInt(ctx->i32, const_offset, 0), + offset = LLVMBuildAdd(ctx->ac.builder, offset, + LLVMConstInt(ctx->ac.i32, const_offset, 0), ""); *const_out = const_offset; @@ -2690,9 +2711,9 @@ load_tcs_input(struct nir_to_llvm_context *ctx, const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); const bool is_compact = instr->variables[0]->var->data.compact; param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - radv_get_deref_offset(ctx, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); stride = unpack_param(ctx, ctx->tcs_in_layout, 13, 8); dw_addr = get_tcs_in_current_patch_offset(ctx); @@ -2705,7 +2726,7 @@ load_tcs_input(struct nir_to_llvm_context *ctx, ctx->i32one, ""); } result = ac_build_gather_values(&ctx->ac, value, instr->num_components); - result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), ""); + result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } @@ -2722,9 +2743,9 @@ load_tcs_output(struct nir_to_llvm_context *ctx, const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); const bool is_compact = instr->variables[0]->var->data.compact; param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - radv_get_deref_offset(ctx, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); if (!instr->variables[0]->var->data.patch) { stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8); @@ -2742,7 +2763,7 @@ load_tcs_output(struct nir_to_llvm_context *ctx, ctx->i32one, ""); } result = ac_build_gather_values(&ctx->ac, value, instr->num_components); - result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), ""); + result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } @@ -2761,9 +2782,9 @@ store_tcs_output(struct nir_to_llvm_context *ctx, const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); const bool is_compact = instr->variables[0]->var->data.compact; - radv_get_deref_offset(ctx, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); param = shader_io_get_unique_index(instr->variables[0]->var->data.location); if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && @@ -2791,7 +2812,7 @@ store_tcs_output(struct nir_to_llvm_context *ctx, bool is_tess_factor = false; if (!(writemask & (1 << chan))) continue; - LLVMValueRef value = llvm_extract_elem(ctx, src, chan); + LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan); lds_store(ctx, dw_addr, value); @@ -2828,9 +2849,9 @@ load_tes_input(struct nir_to_llvm_context *ctx, const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); const bool is_compact = instr->variables[0]->var->data.compact; - radv_get_deref_offset(ctx, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); param = shader_io_get_unique_index(instr->variables[0]->var->data.location); if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { @@ -2842,8 +2863,8 @@ load_tes_input(struct nir_to_llvm_context *ctx, result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL, buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); - result = trim_vector(ctx, result, instr->num_components); - result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), ""); + result = trim_vector(&ctx->ac, result, instr->num_components); + result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } @@ -2857,9 +2878,9 @@ load_gs_input(struct nir_to_llvm_context *ctx, unsigned param, vtx_offset_param; LLVMValueRef value[4], result; unsigned vertex_index; - radv_get_deref_offset(ctx, instr->variables[0], - false, &vertex_index, NULL, - &const_index, &indir_index); + get_deref_offset(ctx->nir, instr->variables[0], + false, &vertex_index, NULL, + &const_index, &indir_index); vtx_offset_param = vertex_index; assert(vtx_offset_param < 6); vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param], @@ -2889,7 +2910,7 @@ load_gs_input(struct nir_to_llvm_context *ctx, } static LLVMValueRef -build_gep_for_deref(struct nir_to_llvm_context *ctx, +build_gep_for_deref(struct ac_nir_context *ctx, nir_deref_var *deref) { struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); @@ -2901,10 +2922,10 @@ build_gep_for_deref(struct nir_to_llvm_context *ctx, switch (tail->deref_type) { case nir_deref_type_array: { nir_deref_array *array = nir_deref_as_array(tail); - offset = LLVMConstInt(ctx->i32, array->base_offset, 0); + offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0); if (array->deref_array_type == nir_deref_array_type_indirect) { - offset = LLVMBuildAdd(ctx->builder, offset, + offset = LLVMBuildAdd(ctx->ac.builder, offset, get_src(ctx, array->indirect), ""); @@ -2914,7 +2935,7 @@ build_gep_for_deref(struct nir_to_llvm_context *ctx, case nir_deref_type_struct: { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - offset = LLVMConstInt(ctx->i32, + offset = LLVMConstInt(ctx->ac.i32, deref_struct->index, 0); break; } @@ -2927,7 +2948,7 @@ build_gep_for_deref(struct nir_to_llvm_context *ctx, return val; } -static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef values[8]; @@ -2938,7 +2959,7 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, unsigned const_index; bool vs_in = ctx->stage == MESA_SHADER_VERTEX && instr->variables[0]->var->data.mode == nir_var_shader_in; - radv_get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, + get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, &const_index, &indir_index); if (instr->dest.ssa.bit_size == 64) @@ -2947,11 +2968,11 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, switch (instr->variables[0]->var->data.mode) { case nir_var_shader_in: if (ctx->stage == MESA_SHADER_TESS_CTRL) - return load_tcs_input(ctx, instr); + return load_tcs_input(ctx->nctx, instr); if (ctx->stage == MESA_SHADER_TESS_EVAL) - return load_tes_input(ctx, instr); + return load_tes_input(ctx->nctx, instr); if (ctx->stage == MESA_SHADER_GEOMETRY) { - return load_gs_input(ctx, instr); + return load_gs_input(ctx->nctx, instr); } for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { @@ -2960,14 +2981,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, ctx->stage == MESA_SHADER_VERTEX); count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->inputs + idx + chan, count, + &ctx->ac, ctx->abi->inputs + idx + chan, count, 4, false); - values[chan] = LLVMBuildExtractElement(ctx->builder, + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else - values[chan] = ctx->inputs[idx + chan + const_index * 4]; + values[chan] = ctx->abi->inputs[idx + chan + const_index * 4]; } break; case nir_var_local: @@ -2980,25 +3001,25 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, &ctx->ac, ctx->locals + idx + chan, count, 4, true); - values[chan] = LLVMBuildExtractElement(ctx->builder, + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else { - values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], ""); + values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], ""); } } break; case nir_var_shared: { LLVMValueRef address = build_gep_for_deref(ctx, instr->variables[0]); - LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, ""); - return LLVMBuildBitCast(ctx->builder, val, + LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); + return LLVMBuildBitCast(ctx->ac.builder, val, get_def_type(ctx, &instr->dest.ssa), ""); } case nir_var_shader_out: if (ctx->stage == MESA_SHADER_TESS_CTRL) - return load_tcs_output(ctx, instr); + return load_tcs_output(ctx->nctx, instr); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( @@ -3008,11 +3029,11 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, &ctx->ac, ctx->outputs + idx + chan, count, 4, true); - values[chan] = LLVMBuildExtractElement(ctx->builder, + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else { - values[chan] = LLVMBuildLoad(ctx->builder, + values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->outputs[idx + chan + const_index * 4], ""); } @@ -3022,12 +3043,12 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, unreachable("unhandle variable mode"); } ret = ac_build_gather_values(&ctx->ac, values, ve); - return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void -visit_store_var(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +visit_store_var(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) { LLVMValueRef temp_ptr, value; int idx = instr->variables[0]->var->data.driver_location; @@ -3035,14 +3056,14 @@ visit_store_var(struct nir_to_llvm_context *ctx, int writemask = instr->const_index[0]; LLVMValueRef indir_index; unsigned const_index; - radv_get_deref_offset(ctx, instr->variables[0], false, - NULL, NULL, &const_index, &indir_index); + get_deref_offset(ctx, instr->variables[0], false, + NULL, NULL, &const_index, &indir_index); if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { int old_writemask = writemask; - src = LLVMBuildBitCast(ctx->builder, src, - LLVMVectorType(ctx->f32, get_llvm_num_components(src) * 2), + src = LLVMBuildBitCast(ctx->ac.builder, src, + LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2), ""); writemask = 0; @@ -3056,7 +3077,7 @@ visit_store_var(struct nir_to_llvm_context *ctx, case nir_var_shader_out: if (ctx->stage == MESA_SHADER_TESS_CTRL) { - store_tcs_output(ctx, instr, src, writemask); + store_tcs_output(ctx->nctx, instr, src, writemask); return; } @@ -3065,7 +3086,7 @@ visit_store_var(struct nir_to_llvm_context *ctx, if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(ctx, src, chan); + value = llvm_extract_elem(&ctx->ac, src, chan); if (instr->variables[0]->var->data.compact) stride = 1; @@ -3078,17 +3099,17 @@ visit_store_var(struct nir_to_llvm_context *ctx, stride, true); if (get_llvm_num_components(tmp_vec) > 1) { - tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); } else tmp_vec = value; - build_store_values_extended(ctx, ctx->outputs + idx + chan, + build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan, count, stride, tmp_vec); } else { temp_ptr = ctx->outputs[idx + chan + const_index * stride]; - LLVMBuildStore(ctx->builder, value, temp_ptr); + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); } } break; @@ -3097,7 +3118,7 @@ visit_store_var(struct nir_to_llvm_context *ctx, if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(ctx, src, chan); + value = llvm_extract_elem(&ctx->ac, src, chan); if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); @@ -3106,14 +3127,14 @@ visit_store_var(struct nir_to_llvm_context *ctx, &ctx->ac, ctx->locals + idx + chan, count, 4, true); - tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); - build_store_values_extended(ctx, ctx->locals + idx + chan, + build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, count, 4, tmp_vec); } else { temp_ptr = ctx->locals[idx + chan + const_index * 4]; - LLVMBuildStore(ctx->builder, value, temp_ptr); + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); } } break; @@ -3127,22 +3148,22 @@ visit_store_var(struct nir_to_llvm_context *ctx, nir_deref_tail(&instr->variables[0]->deref)->type); if (writemask == (1 << components) - 1) { val = LLVMBuildBitCast( - ctx->builder, val, + ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); - LLVMBuildStore(ctx->builder, val, address); + LLVMBuildStore(ctx->ac.builder, val, address); } else { for (unsigned chan = 0; chan < 4; chan++) { if (!(writemask & (1 << chan))) continue; LLVMValueRef ptr = - LLVMBuildStructGEP(ctx->builder, + LLVMBuildStructGEP(ctx->ac.builder, address, chan, ""); - LLVMValueRef src = llvm_extract_elem(ctx, val, + LLVMValueRef src = llvm_extract_elem(&ctx->ac, val, chan); src = LLVMBuildBitCast( - ctx->builder, src, + ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); - LLVMBuildStore(ctx->builder, src, ptr); + LLVMBuildStore(ctx->ac.builder, src, ptr); } } break; @@ -3193,7 +3214,7 @@ static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) * The sample index should be adjusted as follows: * sample_index = (fmask >> (sample_index * 4)) & 0xF; */ -static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx, +static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, LLVMValueRef coord_x, LLVMValueRef coord_y, LLVMValueRef coord_z, LLVMValueRef sample_index, @@ -3215,17 +3236,17 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * args.da = coord_z ? true : false; args.resource = fmask_desc_ptr; args.dmask = 0xf; - args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2); + args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); - res = ac_build_image_opcode(&ctx->ac, &args); + res = ac_build_image_opcode(ctx, &args); - res = to_integer(&ctx->ac, res); + res = to_integer(ctx, res); LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, res, - ctx->i32zero, ""); + ctx->i32_0, ""); LLVMValueRef sample_index4 = LLVMBuildMul(ctx->builder, sample_index, four, ""); @@ -3243,11 +3264,11 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * LLVMValueRef fmask_word1 = LLVMBuildExtractElement(ctx->builder, fmask_desc, - ctx->i32one, ""); + ctx->i32_1, ""); LLVMValueRef word1_is_nonzero = LLVMBuildICmp(ctx->builder, LLVMIntNE, - fmask_word1, ctx->i32zero, ""); + fmask_word1, ctx->i32_0, ""); /* Replace the MSAA sample index. */ sample_index = @@ -3256,7 +3277,7 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * return sample_index; } -static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, +static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { const struct glsl_type *type = instr->variables[0]->var->type; @@ -3266,11 +3287,11 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, LLVMValueRef src0 = get_src(ctx, instr->src[0]); LLVMValueRef coords[4]; LLVMValueRef masks[] = { - LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), - LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false), + LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), }; LLVMValueRef res; - LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0); + LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); int count; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); @@ -3286,26 +3307,26 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, LLVMValueRef fmask_load_address[3]; int chan; - fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); - fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], ""); + fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); if (glsl_sampler_type_is_array(type)) - fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], ""); + fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); else fmask_load_address[2] = NULL; if (add_frag_pos) { for (chan = 0; chan < 2; ++chan) - fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); + fmask_load_address[chan] = LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->nctx->frag_pos[chan], ctx->ac.i32, ""), ""); } - sample_index = adjust_sample_index_using_fmask(ctx, + sample_index = adjust_sample_index_using_fmask(&ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2], sample_index, - get_sampler_desc(ctx, instr->variables[0], DESC_FMASK)); + get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, true, false)); } if (count == 1) { if (instr->src[0].ssa->num_components) - res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); + res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); else res = src0; } else { @@ -3313,12 +3334,12 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, if (is_ms) count--; for (chan = 0; chan < count; ++chan) { - coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], ""); + coords[chan] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[chan], ""); } if (add_frag_pos) { for (chan = 0; chan < count; ++chan) - coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); + coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->nctx->frag_pos[chan], ctx->ac.i32, ""), ""); } if (is_ms) { coords[count] = sample_index; @@ -3326,7 +3347,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, } if (count == 3) { - coords[3] = LLVMGetUndef(ctx->i32); + coords[3] = LLVMGetUndef(ctx->ac.i32); count = 4; } res = ac_build_gather_values(&ctx->ac, coords, count); @@ -3334,7 +3355,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, return res; } -static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef params[7]; @@ -3342,39 +3363,42 @@ static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, char intrinsic_name[64]; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = var->type; + LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false); + LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false); + if(instr->variables[0]->deref.child) type = instr->variables[0]->deref.child->type; type = glsl_without_array(type); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); - params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ - params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ - params[3] = ctx->i1false; /* glc */ - params[4] = ctx->i1false; /* slc */ - res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32, + params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, false); + params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[2] = ctx->ac.i32_0; /* voffset */ + params[3] = i1false; /* glc */ + params[4] = i1false; /* slc */ + res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32, params, 5, 0); - res = trim_vector(ctx, res, instr->dest.ssa.num_components); + res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); res = to_integer(&ctx->ac, res); } else { bool is_da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false; - LLVMValueRef glc = ctx->i1false; - LLVMValueRef slc = ctx->i1false; + LLVMValueRef da = is_da ? i1true : i1false; + LLVMValueRef glc = i1false; + LLVMValueRef slc = i1false; params[0] = get_image_coords(ctx, instr); - params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ + params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, false); + params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ if (HAVE_LLVM <= 0x0309) { - params[3] = ctx->i1false; /* r128 */ + params[3] = i1false; /* r128 */ params[4] = da; params[5] = glc; params[6] = slc; } else { - LLVMValueRef lwe = ctx->i1false; + LLVMValueRef lwe = i1false; params[3] = glc; params[4] = slc; params[5] = lwe; @@ -3382,56 +3406,58 @@ static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, } ac_get_image_intr_name("llvm.amdgcn.image.load", - ctx->v4f32, /* vdata */ + ctx->ac.v4f32, /* vdata */ LLVMTypeOf(params[0]), /* coords */ LLVMTypeOf(params[1]), /* rsrc */ intrinsic_name, sizeof(intrinsic_name)); - res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32, + res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32, params, 7, AC_FUNC_ATTR_READONLY); } return to_integer(&ctx->ac, res); } -static void visit_image_store(struct nir_to_llvm_context *ctx, +static void visit_image_store(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef params[8]; char intrinsic_name[64]; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = glsl_without_array(var->type); - - if (ctx->stage == MESA_SHADER_FRAGMENT) - ctx->shader_info->fs.writes_memory = true; + LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false); + LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false); + LLVMValueRef glc = i1false; + bool force_glc = ctx->abi->chip_class == SI; + if (force_glc) + glc = i1true; if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ - params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); - params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ - params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ - params[4] = ctx->i1false; /* glc */ - params[5] = ctx->i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt, + params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true); + params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[3] = ctx->ac.i32_0; /* voffset */ + params[4] = glc; /* glc */ + params[5] = i1false; /* slc */ + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, params, 6, 0); } else { bool is_da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false; - LLVMValueRef glc = ctx->i1false; - LLVMValueRef slc = ctx->i1false; + LLVMValueRef da = is_da ? i1true : i1false; + LLVMValueRef slc = i1false; params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2])); params[1] = get_image_coords(ctx, instr); /* coords */ - params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ + params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, true); + params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ if (HAVE_LLVM <= 0x0309) { - params[4] = ctx->i1false; /* r128 */ + params[4] = i1false; /* r128 */ params[5] = da; params[6] = glc; params[7] = slc; } else { - LLVMValueRef lwe = ctx->i1false; + LLVMValueRef lwe = i1false; params[4] = glc; params[5] = slc; params[6] = lwe; @@ -3444,13 +3470,13 @@ static void visit_image_store(struct nir_to_llvm_context *ctx, LLVMTypeOf(params[2]), /* rsrc */ intrinsic_name, sizeof(intrinsic_name)); - ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, + ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt, params, 8, 0); } } -static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef params[6]; @@ -3460,11 +3486,10 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, const char *atomic_name; char intrinsic_name[41]; const struct glsl_type *type = glsl_without_array(var->type); + LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false); + LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false); MAYBE_UNUSED int length; - if (ctx->stage == MESA_SHADER_FRAGMENT) - ctx->shader_info->fs.writes_memory = true; - switch (instr->intrinsic) { case nir_intrinsic_image_atomic_add: atomic_name = "add"; @@ -3499,11 +3524,12 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, params[param_count++] = get_src(ctx, instr->src[2]); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); - params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ - params[param_count++] = ctx->i32zero; /* voffset */ - params[param_count++] = ctx->i1false; /* slc */ + params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, + true, true); + params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[param_count++] = ctx->ac.i32_0; /* voffset */ + params[param_count++] = i1false; /* slc */ length = snprintf(intrinsic_name, sizeof(intrinsic_name), "llvm.amdgcn.buffer.atomic.%s", atomic_name); @@ -3514,10 +3540,11 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr); - params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[param_count++] = ctx->i1false; /* r128 */ - params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */ - params[param_count++] = ctx->i1false; /* slc */ + params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, + true, true); + params[param_count++] = i1false; /* r128 */ + params[param_count++] = da ? i1true : i1false; /* da */ + params[param_count++] = i1false; /* slc */ build_int_type_name(LLVMTypeOf(coords), coords_type, sizeof(coords_type)); @@ -3527,10 +3554,10 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, } assert(length < sizeof(intrinsic_name)); - return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0); + return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0); } -static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, +static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef res; @@ -3542,25 +3569,27 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, type = instr->variables[0]->deref.child->type; if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) - return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true); + return get_buffer_size(ctx, + get_sampler_desc(ctx, instr->variables[0], + AC_DESC_BUFFER, true, false), true); struct ac_image_args args = { 0 }; args.da = da; args.dmask = 0xf; - args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); + args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->i32zero; + args.addr = ctx->ac.i32_0; res = ac_build_image_opcode(&ctx->ac, &args); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) { - LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); - LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, ""); - z = LLVMBuildSDiv(ctx->builder, z, six, ""); - res = LLVMBuildInsertElement(ctx->builder, res, z, two, ""); + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); } return res; } @@ -3598,10 +3627,9 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef cond; - ctx->shader_info->fs.can_discard = true; cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, - get_src(ctx, instr->src[0]), + get_src(ctx->nir, instr->src[0]), ctx->i32zero, ""); cond = LLVMBuildSelect(ctx->builder, cond, @@ -3625,11 +3653,11 @@ static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, const nir_intrinsic_instr *instr) { LLVMValueRef ptr, result; - LLVMValueRef src = get_src(ctx, instr->src[0]); - ptr = build_gep_for_deref(ctx, instr->variables[0]); + LLVMValueRef src = get_src(ctx->nir, instr->src[0]); + ptr = build_gep_for_deref(ctx->nir, instr->variables[0]); if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) { - LLVMValueRef src1 = get_src(ctx, instr->src[1]); + LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]); result = LLVMBuildAtomicCmpXchg(ctx->builder, ptr, src, src1, LLVMAtomicOrderingSequentiallyConsistent, @@ -3749,7 +3777,7 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: location = INTERP_CENTER; - src0 = get_src(ctx, instr->src[0]); + src0 = get_src(ctx->nir, instr->src[0]); break; default: break; @@ -3866,7 +3894,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, /* loop num outputs */ idx = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; + LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; int length = 4; int slot = idx; int slot_inc = 1; @@ -3928,92 +3956,90 @@ visit_load_tess_coord(struct nir_to_llvm_context *ctx, LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components); return LLVMBuildBitCast(ctx->builder, result, - get_def_type(ctx, &instr->dest.ssa), ""); + get_def_type(ctx->nir, &instr->dest.ssa), ""); } -static void visit_intrinsic(struct nir_to_llvm_context *ctx, +static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef result = NULL; switch (instr->intrinsic) { case nir_intrinsic_load_work_group_id: { - result = ctx->workgroup_ids; + result = ctx->nctx->workgroup_ids; break; } case nir_intrinsic_load_base_vertex: { - result = ctx->base_vertex; + result = ctx->abi->base_vertex; break; } case nir_intrinsic_load_vertex_id_zero_base: { - result = ctx->vertex_id; + result = ctx->abi->vertex_id; break; } case nir_intrinsic_load_local_invocation_id: { - result = ctx->local_invocation_ids; + result = ctx->nctx->local_invocation_ids; break; } case nir_intrinsic_load_base_instance: - result = ctx->start_instance; + result = ctx->abi->start_instance; break; case nir_intrinsic_load_draw_id: - result = ctx->draw_index; + result = ctx->abi->draw_id; break; case nir_intrinsic_load_invocation_id: if (ctx->stage == MESA_SHADER_TESS_CTRL) - result = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5); + result = unpack_param(ctx->nctx, ctx->nctx->tcs_rel_ids, 8, 5); else - result = ctx->gs_invocation_id; + result = ctx->nctx->gs_invocation_id; break; case nir_intrinsic_load_primitive_id: if (ctx->stage == MESA_SHADER_GEOMETRY) { - ctx->shader_info->gs.uses_prim_id = true; - result = ctx->gs_prim_id; + ctx->nctx->shader_info->gs.uses_prim_id = true; + result = ctx->nctx->gs_prim_id; } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { - ctx->shader_info->tcs.uses_prim_id = true; - result = ctx->tcs_patch_id; + ctx->nctx->shader_info->tcs.uses_prim_id = true; + result = ctx->nctx->tcs_patch_id; } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - ctx->shader_info->tcs.uses_prim_id = true; - result = ctx->tes_patch_id; + ctx->nctx->shader_info->tcs.uses_prim_id = true; + result = ctx->nctx->tes_patch_id; } else fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); break; case nir_intrinsic_load_sample_id: - ctx->shader_info->fs.force_persample = true; - result = unpack_param(ctx, ctx->ancillary, 8, 4); + ctx->nctx->shader_info->fs.force_persample = true; + result = unpack_param(ctx->nctx, ctx->nctx->ancillary, 8, 4); break; case nir_intrinsic_load_sample_pos: - ctx->shader_info->fs.force_persample = true; - result = load_sample_pos(ctx); + ctx->nctx->shader_info->fs.force_persample = true; + result = load_sample_pos(ctx->nctx); break; case nir_intrinsic_load_sample_mask_in: - result = ctx->sample_coverage; + result = ctx->nctx->sample_coverage; break; case nir_intrinsic_load_front_face: - result = ctx->front_face; + result = ctx->nctx->front_face; break; case nir_intrinsic_load_instance_id: - result = ctx->instance_id; - ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, - ctx->shader_info->vs.vgpr_comp_cnt); + result = ctx->abi->instance_id; break; case nir_intrinsic_load_num_work_groups: - result = ctx->num_work_groups; + result = ctx->nctx->num_work_groups; break; case nir_intrinsic_load_local_invocation_index: - result = visit_load_local_invocation_index(ctx); + result = visit_load_local_invocation_index(ctx->nctx); break; case nir_intrinsic_load_push_constant: - result = visit_load_push_constant(ctx, instr); + result = visit_load_push_constant(ctx->nctx, instr); break; case nir_intrinsic_vulkan_resource_index: - result = visit_vulkan_resource_index(ctx, instr); + result = visit_vulkan_resource_index(ctx->nctx, instr); break; case nir_intrinsic_store_ssbo: - visit_store_ssbo(ctx, instr); + visit_store_ssbo(ctx->nctx, instr); break; case nir_intrinsic_load_ssbo: - result = visit_load_buffer(ctx, instr); + result = visit_load_buffer(ctx->nctx, instr); break; case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: @@ -4025,7 +4051,7 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: - result = visit_atomic_ssbo(ctx, instr); + result = visit_atomic_ssbo(ctx->nctx, instr); break; case nir_intrinsic_load_ubo: result = visit_load_ubo_buffer(ctx, instr); @@ -4059,19 +4085,18 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, result = visit_image_size(ctx, instr); break; case nir_intrinsic_discard: - ctx->shader_info->fs.can_discard = true; ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", - ctx->voidt, + LLVMVoidTypeInContext(ctx->ac.context), NULL, 0, AC_FUNC_ATTR_LEGACY); break; case nir_intrinsic_discard_if: - emit_discard_if(ctx, instr); + emit_discard_if(ctx->nctx, instr); break; case nir_intrinsic_memory_barrier: - emit_waitcnt(ctx, VM_CNT); + emit_waitcnt(ctx->nctx, VM_CNT); break; case nir_intrinsic_barrier: - emit_barrier(ctx); + emit_barrier(ctx->nctx); break; case nir_intrinsic_var_atomic_add: case nir_intrinsic_var_atomic_imin: @@ -4083,24 +4108,24 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, case nir_intrinsic_var_atomic_xor: case nir_intrinsic_var_atomic_exchange: case nir_intrinsic_var_atomic_comp_swap: - result = visit_var_atomic(ctx, instr); + result = visit_var_atomic(ctx->nctx, instr); break; case nir_intrinsic_interp_var_at_centroid: case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: - result = visit_interp(ctx, instr); + result = visit_interp(ctx->nctx, instr); break; case nir_intrinsic_emit_vertex: - visit_emit_vertex(ctx, instr); + visit_emit_vertex(ctx->nctx, instr); break; case nir_intrinsic_end_primitive: - visit_end_primitive(ctx, instr); + visit_end_primitive(ctx->nctx, instr); break; case nir_intrinsic_load_tess_coord: - result = visit_load_tess_coord(ctx, instr); + result = visit_load_tess_coord(ctx->nctx, instr); break; case nir_intrinsic_load_patch_vertices_in: - result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false); + result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false); break; default: fprintf(stderr, "Unknown intrinsic: "); @@ -4113,42 +4138,47 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, } } -static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, - const nir_deref_var *deref, - enum desc_type desc_type) +static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, + unsigned descriptor_set, + unsigned base_index, + unsigned constant_index, + LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool image, bool write) { - unsigned desc_set = deref->var->data.descriptor_set; - LLVMValueRef list = ctx->descriptor_sets[desc_set]; - struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout; - struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding; + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; + struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; + struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; unsigned offset = binding->offset; unsigned stride = binding->size; unsigned type_size; LLVMBuilderRef builder = ctx->builder; LLVMTypeRef type; - LLVMValueRef index = NULL; - unsigned constant_index = 0; - assert(deref->var->data.binding < layout->binding_count); + assert(base_index < layout->binding_count); + + if (write && ctx->stage == MESA_SHADER_FRAGMENT) + ctx->shader_info->fs.writes_memory = true; switch (desc_type) { - case DESC_IMAGE: + case AC_DESC_IMAGE: type = ctx->v8i32; type_size = 32; break; - case DESC_FMASK: + case AC_DESC_FMASK: type = ctx->v8i32; offset += 32; type_size = 32; break; - case DESC_SAMPLER: + case AC_DESC_SAMPLER: type = ctx->v4i32; if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) offset += 64; type_size = 16; break; - case DESC_BUFFER: + case AC_DESC_BUFFER: type = ctx->v4i32; type_size = 16; break; @@ -4156,19 +4186,9 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, unreachable("invalid desc_type\n"); } - if (deref->deref.child) { - const nir_deref_array *child = - (const nir_deref_array *)deref->deref.child; - - assert(child->deref_array_type != nir_deref_array_type_wildcard); - offset += child->base_offset * stride; - if (child->deref_array_type == nir_deref_array_type_indirect) { - index = get_src(ctx, child->indirect); - } + offset += constant_index * stride; - constant_index = child->base_offset; - } - if (desc_type == DESC_SAMPLER && binding->immutable_samplers_offset && + if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && (!index || binding->immutable_samplers_equal)) { if (binding->immutable_samplers_equal) constant_index = 0; @@ -4197,7 +4217,49 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, return ac_build_indexed_load_const(&ctx->ac, list, index); } -static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, + const nir_deref_var *deref, + enum ac_descriptor_type desc_type, + bool image, bool write) +{ + LLVMValueRef index = NULL; + unsigned constant_index = 0; + const nir_deref *tail = &deref->deref; + + while (tail->child) { + const nir_deref_array *child = nir_deref_as_array(tail->child); + unsigned array_size = glsl_get_aoa_size(tail->child->type); + + if (!array_size) + array_size = 1; + + assert(child->deref_array_type != nir_deref_array_type_wildcard); + + if (child->deref_array_type == nir_deref_array_type_indirect) { + LLVMValueRef indirect = get_src(ctx, child->indirect); + + indirect = LLVMBuildMul(ctx->ac.builder, indirect, + LLVMConstInt(ctx->ac.i32, array_size, false), ""); + + if (!index) + index = indirect; + else + index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); + } + + constant_index += child->base_offset * array_size; + + tail = &child->deref; + } + + return ctx->abi->load_sampler_desc(ctx->abi, + deref->var->data.descriptor_set, + deref->var->data.binding, + constant_index, index, + desc_type, image, write); +} + +static void set_tex_fetch_args(struct ac_llvm_context *ctx, struct ac_image_args *args, const nir_tex_instr *instr, nir_texop op, @@ -4215,7 +4277,7 @@ static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, param[count++] = LLVMGetUndef(ctx->i32); if (count > 1) - args->addr = ac_build_gather_values(&ctx->ac, param, count); + args->addr = ac_build_gather_values(ctx, param, count); else args->addr = param[0]; @@ -4243,56 +4305,56 @@ static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, * VI: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ -static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx, +static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res, LLVMValueRef samp) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef img7, samp0; - if (ctx->options->chip_class >= VI) + if (ctx->abi->chip_class >= VI) return samp; img7 = LLVMBuildExtractElement(builder, res, - LLVMConstInt(ctx->i32, 7, 0), ""); + LLVMConstInt(ctx->ac.i32, 7, 0), ""); samp0 = LLVMBuildExtractElement(builder, samp, - LLVMConstInt(ctx->i32, 0, 0), ""); + LLVMConstInt(ctx->ac.i32, 0, 0), ""); samp0 = LLVMBuildAnd(builder, samp0, img7, ""); return LLVMBuildInsertElement(builder, samp, samp0, - LLVMConstInt(ctx->i32, 0, 0), ""); + LLVMConstInt(ctx->ac.i32, 0, 0), ""); } -static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx, +static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr, LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) - *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER); + *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, false, false); else - *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE); + *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, false, false); if (samp_ptr) { if (instr->sampler) - *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER); + *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, false, false); else - *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER); + *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, false, false); if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); } if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical)) - *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK); + *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, false, false); } -static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx, +static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord) { - coord = to_float(&ctx->ac, coord); - coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0); - coord = to_integer(&ctx->ac, coord); + coord = to_float(ctx, coord); + coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); + coord = to_integer(ctx, coord); return coord; } -static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) +static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; struct ac_image_args args = { 0 }; @@ -4307,6 +4369,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) unsigned chan, count = 0; unsigned const_src = 0, num_deriv_comp = 0; bool lod_is_zero = false; + tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); for (unsigned i = 0; i < instr->num_srcs; i++) { @@ -4361,48 +4424,48 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; - res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, ""); - samples = LLVMBuildExtractElement(ctx->builder, res, - LLVMConstInt(ctx->i32, 3, false), ""); - is_msaa = LLVMBuildLShr(ctx->builder, samples, - LLVMConstInt(ctx->i32, 28, false), ""); - is_msaa = LLVMBuildAnd(ctx->builder, is_msaa, - LLVMConstInt(ctx->i32, 0xe, false), ""); - is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa, - LLVMConstInt(ctx->i32, 0xe, false), ""); - - samples = LLVMBuildLShr(ctx->builder, samples, - LLVMConstInt(ctx->i32, 16, false), ""); - samples = LLVMBuildAnd(ctx->builder, samples, - LLVMConstInt(ctx->i32, 0xf, false), ""); - samples = LLVMBuildShl(ctx->builder, ctx->i32one, + res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); + samples = LLVMBuildExtractElement(ctx->ac.builder, res, + LLVMConstInt(ctx->ac.i32, 3, false), ""); + is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 28, false), ""); + is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + + samples = LLVMBuildLShr(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 16, false), ""); + samples = LLVMBuildAnd(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 0xf, false), ""); + samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); - samples = LLVMBuildSelect(ctx->builder, is_msaa, samples, - ctx->i32one, ""); + samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, + ctx->ac.i32_1, ""); result = samples; goto write_result; } if (coord) for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = llvm_extract_elem(ctx, coord, chan); + coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan); if (offsets && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; for (chan = 0; chan < 3; ++chan) - offset[chan] = ctx->i32zero; + offset[chan] = ctx->ac.i32_0; args.offset = true; for (chan = 0; chan < get_llvm_num_components(offsets); chan++) { - offset[chan] = llvm_extract_elem(ctx, offsets, chan); - offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan], - LLVMConstInt(ctx->i32, 0x3f, false), ""); + offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan); + offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) - offset[chan] = LLVMBuildShl(ctx->builder, offset[chan], - LLVMConstInt(ctx->i32, chan * 8, false), ""); + offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); } - pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], ""); - pack = LLVMBuildOr(ctx->builder, pack, offset[2], ""); + pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); + pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); address[count++] = pack; } @@ -4413,7 +4476,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) /* Pack depth comparison value */ if (instr->is_shadow && comparator) { - address[count++] = llvm_extract_elem(ctx, comparator, 0); + address[count++] = llvm_extract_elem(&ctx->ac, comparator, 0); } /* pack derivatives */ @@ -4433,18 +4496,18 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) } for (unsigned i = 0; i < num_deriv_comp; i++) { - derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i)); - derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i)); + derivs[i] = to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i)); + derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddy, i)); } } if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { if (instr->is_array && instr->op != nir_texop_lod) - coords[3] = apply_round_slice(ctx, coords[3]); + coords[3] = apply_round_slice(&ctx->ac, coords[3]); for (chan = 0; chan < instr->coord_components; chan++) coords[chan] = to_float(&ctx->ac, coords[chan]); if (instr->coord_components == 3) - coords[3] = LLVMGetUndef(ctx->f32); + coords[3] = LLVMGetUndef(ctx->ac.f32); ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, coords, derivs); @@ -4462,7 +4525,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) address[count++] = coords[0]; if (instr->coord_components > 1) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - coords[1] = apply_round_slice(ctx, coords[1]); + coords[1] = apply_round_slice(&ctx->ac, coords[1]); } address[count++] = coords[1]; } @@ -4471,7 +4534,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE && instr->op != nir_texop_txf) { - coords[2] = apply_round_slice(ctx, coords[2]); + coords[2] = apply_round_slice(&ctx->ac, coords[2]); } address[count++] = coords[2]; } @@ -4488,12 +4551,12 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) if (lod) address[count++] = lod; else - address[count++] = ctx->i32zero; + address[count++] = ctx->ac.i32_0; } for (chan = 0; chan < count; chan++) { - address[chan] = LLVMBuildBitCast(ctx->builder, - address[chan], ctx->i32, ""); + address[chan] = LLVMBuildBitCast(ctx->ac.builder, + address[chan], ctx->ac.i32, ""); } if (instr->op == nir_texop_samples_identical) { @@ -4503,24 +4566,24 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) memcpy(txf_address, address, sizeof(txf_address)); if (!instr->is_array) - txf_address[2] = ctx->i32zero; - txf_address[3] = ctx->i32zero; + txf_address[2] = ctx->ac.i32_0; + txf_address[3] = ctx->ac.i32_0; - set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf, + set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, fmask_ptr, NULL, txf_address, txf_count, 0xf); result = build_tex_intrinsic(ctx, instr, false, &txf_args); - result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); - result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->i32zero); + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); goto write_result; } if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && instr->op != nir_texop_txs) { unsigned sample_chan = instr->is_array ? 3 : 2; - address[sample_chan] = adjust_sample_index_using_fmask(ctx, + address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, address[0], address[1], instr->is_array ? address[2] : NULL, @@ -4535,13 +4598,13 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) assert(const_offset); num_offsets = MIN2(num_offsets, instr->coord_components); if (num_offsets > 2) - address[2] = LLVMBuildAdd(ctx->builder, - address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), ""); + address[2] = LLVMBuildAdd(ctx->ac.builder, + address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); if (num_offsets > 1) - address[1] = LLVMBuildAdd(ctx->builder, - address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), ""); - address[0] = LLVMBuildAdd(ctx->builder, - address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), ""); + address[1] = LLVMBuildAdd(ctx->ac.builder, + address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); + address[0] = LLVMBuildAdd(ctx->ac.builder, + address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); } @@ -4552,25 +4615,27 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) else dmask = 1 << instr->component; } - set_tex_fetch_args(ctx, &args, instr, instr->op, + set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, res_ptr, samp_ptr, address, count, dmask); result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); if (instr->op == nir_texop_query_levels) - result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), ""); - else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4) - result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); + else if (instr->is_shadow && instr->is_new_style_shadow && + instr->op != nir_texop_txs && instr->op != nir_texop_lod && + instr->op != nir_texop_tg4) + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array) { - LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); - LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, ""); - z = LLVMBuildSDiv(ctx->builder, z, six, ""); - result = LLVMBuildInsertElement(ctx->builder, result, z, two, ""); + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); } else if (instr->dest.ssa.num_components != 4) - result = trim_vector(ctx, result, instr->dest.ssa.num_components); + result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); write_result: if (result) { @@ -4581,16 +4646,16 @@ write_result: } -static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr) +static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr) { LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); - LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, ""); + LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); _mesa_hash_table_insert(ctx->phis, instr, result); } -static void visit_post_phi(struct nir_to_llvm_context *ctx, +static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi) { @@ -4602,7 +4667,7 @@ static void visit_post_phi(struct nir_to_llvm_context *ctx, } } -static void phi_post_pass(struct nir_to_llvm_context *ctx) +static void phi_post_pass(struct ac_nir_context *ctx) { struct hash_entry *entry; hash_table_foreach(ctx->phis, entry) { @@ -4612,31 +4677,31 @@ static void phi_post_pass(struct nir_to_llvm_context *ctx) } -static void visit_ssa_undef(struct nir_to_llvm_context *ctx, +static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr) { unsigned num_components = instr->def.num_components; LLVMValueRef undef; if (num_components == 1) - undef = LLVMGetUndef(ctx->i32); + undef = LLVMGetUndef(ctx->ac.i32); else { - undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components)); + undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, num_components)); } _mesa_hash_table_insert(ctx->defs, &instr->def, undef); } -static void visit_jump(struct nir_to_llvm_context *ctx, +static void visit_jump(struct ac_nir_context *ctx, const nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: - LLVMBuildBr(ctx->builder, ctx->break_block); - LLVMClearInsertionPosition(ctx->builder); + LLVMBuildBr(ctx->ac.builder, ctx->break_block); + LLVMClearInsertionPosition(ctx->ac.builder); break; case nir_jump_continue: - LLVMBuildBr(ctx->builder, ctx->continue_block); - LLVMClearInsertionPosition(ctx->builder); + LLVMBuildBr(ctx->ac.builder, ctx->continue_block); + LLVMClearInsertionPosition(ctx->ac.builder); break; default: fprintf(stderr, "Unknown NIR jump instr: "); @@ -4646,12 +4711,12 @@ static void visit_jump(struct nir_to_llvm_context *ctx, } } -static void visit_cf_list(struct nir_to_llvm_context *ctx, +static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list); -static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block) +static void visit_block(struct ac_nir_context *ctx, nir_block *block) { - LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder); + LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder); nir_foreach_instr(instr, block) { switch (instr->type) { @@ -4687,61 +4752,63 @@ static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block) _mesa_hash_table_insert(ctx->defs, block, llvm_block); } -static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt) +static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt) { LLVMValueRef value = get_src(ctx, if_stmt->condition); + LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder)); LLVMBasicBlockRef merge_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); LLVMBasicBlockRef if_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); LLVMBasicBlockRef else_block = merge_block; if (!exec_list_is_empty(&if_stmt->else_list)) else_block = LLVMAppendBasicBlockInContext( - ctx->context, ctx->main_function, ""); + ctx->ac.context, fn, ""); - LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value, - LLVMConstInt(ctx->i32, 0, false), ""); - LLVMBuildCondBr(ctx->builder, cond, if_block, else_block); + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value, + LLVMConstInt(ctx->ac.i32, 0, false), ""); + LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block); - LLVMPositionBuilderAtEnd(ctx->builder, if_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block); visit_cf_list(ctx, &if_stmt->then_list); - if (LLVMGetInsertBlock(ctx->builder)) - LLVMBuildBr(ctx->builder, merge_block); + if (LLVMGetInsertBlock(ctx->ac.builder)) + LLVMBuildBr(ctx->ac.builder, merge_block); if (!exec_list_is_empty(&if_stmt->else_list)) { - LLVMPositionBuilderAtEnd(ctx->builder, else_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block); visit_cf_list(ctx, &if_stmt->else_list); - if (LLVMGetInsertBlock(ctx->builder)) - LLVMBuildBr(ctx->builder, merge_block); + if (LLVMGetInsertBlock(ctx->ac.builder)) + LLVMBuildBr(ctx->ac.builder, merge_block); } - LLVMPositionBuilderAtEnd(ctx->builder, merge_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block); } -static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop) +static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop) { + LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder)); LLVMBasicBlockRef continue_parent = ctx->continue_block; LLVMBasicBlockRef break_parent = ctx->break_block; ctx->continue_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); ctx->break_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); - LLVMBuildBr(ctx->builder, ctx->continue_block); - LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block); + LLVMBuildBr(ctx->ac.builder, ctx->continue_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block); visit_cf_list(ctx, &loop->body); - if (LLVMGetInsertBlock(ctx->builder)) - LLVMBuildBr(ctx->builder, ctx->continue_block); - LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block); + if (LLVMGetInsertBlock(ctx->ac.builder)) + LLVMBuildBr(ctx->ac.builder, ctx->continue_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block); ctx->continue_block = continue_parent; ctx->break_block = break_parent; } -static void visit_cf_list(struct nir_to_llvm_context *ctx, +static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list) { foreach_list_typed(nir_cf_node, node, node, list) @@ -4781,13 +4848,13 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, variable->data.driver_location = idx * 4; if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) { - buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id, - ctx->start_instance, ""); + buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id, + ctx->abi.start_instance, ""); ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, ctx->shader_info->vs.vgpr_comp_cnt); } else - buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id, - ctx->base_vertex, ""); + buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id, + ctx->abi.base_vertex, ""); for (unsigned i = 0; i < attrib_count; ++i, ++idx) { t_offset = LLVMConstInt(ctx->i32, index + i, false); @@ -4945,16 +5012,16 @@ handle_fs_inputs_pre(struct nir_to_llvm_context *ctx, } static LLVMValueRef -ac_build_alloca(struct nir_to_llvm_context *ctx, +ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ac->builder; LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(current_block); LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); - LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context); + LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); LLVMValueRef res; if (first_instr) { @@ -4971,22 +5038,23 @@ ac_build_alloca(struct nir_to_llvm_context *ctx, return res; } -static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx, +static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) { - LLVMValueRef ptr = ac_build_alloca(ctx, type, name); - LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr); + LLVMValueRef ptr = ac_build_alloca(ac, type, name); + LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr); return ptr; } static void -handle_shader_output_decl(struct nir_to_llvm_context *ctx, - struct nir_variable *variable) +scan_shader_output_decl(struct nir_to_llvm_context *ctx, + struct nir_variable *variable) { int idx = variable->data.location + variable->data.index; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); uint64_t mask_attribs; + variable->data.driver_location = idx * 4; /* tess ctrl has it's own load/store paths for outputs */ @@ -5016,13 +5084,42 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx, } } + ctx->output_mask |= mask_attribs; +} + +static void +handle_shader_output_decl(struct ac_nir_context *ctx, + struct nir_shader *nir, + struct nir_variable *variable) +{ + unsigned output_loc = variable->data.driver_location / 4; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + + /* tess ctrl has it's own load/store paths for outputs */ + if (ctx->stage == MESA_SHADER_TESS_CTRL) + return; + + if (ctx->stage == MESA_SHADER_VERTEX || + ctx->stage == MESA_SHADER_TESS_EVAL || + ctx->stage == MESA_SHADER_GEOMETRY) { + int idx = variable->data.location + variable->data.index; + if (idx == VARYING_SLOT_CLIP_DIST0) { + int length = nir->info.clip_distance_array_size + + nir->info.cull_distance_array_size; + + if (length > 4) + attrib_count = 2; + else + attrib_count = 1; + } + } + for (unsigned i = 0; i < attrib_count; ++i) { for (unsigned chan = 0; chan < 4; chan++) { - ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] = - si_build_alloca_undef(ctx, ctx->f32, ""); + ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] = + si_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); } } - ctx->output_mask |= mask_attribs; } static LLVMTypeRef @@ -5088,7 +5185,7 @@ glsl_to_llvm_type(struct nir_to_llvm_context *ctx, } static void -setup_locals(struct nir_to_llvm_context *ctx, +setup_locals(struct ac_nir_context *ctx, struct nir_function *func) { int i, j; @@ -5105,19 +5202,19 @@ setup_locals(struct nir_to_llvm_context *ctx, for (i = 0; i < ctx->num_locals; i++) { for (j = 0; j < 4; j++) { ctx->locals[i * 4 + j] = - si_build_alloca_undef(ctx, ctx->f32, "temp"); + si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); } } } static void -setup_shared(struct nir_to_llvm_context *ctx, +setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir) { nir_foreach_variable(variable, &nir->shared) { LLVMValueRef shared = LLVMAddGlobalInAddressSpace( - ctx->module, glsl_to_llvm_type(ctx, variable->type), + ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type), variable->name ? variable->name : "", LOCAL_ADDR_SPACE); _mesa_hash_table_insert(ctx->vars, variable, shared); @@ -5322,7 +5419,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, i = VARYING_SLOT_CLIP_DIST0; for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) slots[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) slots[i] = LLVMGetUndef(ctx->f32); @@ -5345,26 +5442,26 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { for (unsigned j = 0; j < 4; j++) pos_values[j] = LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], ""); + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], ""); } si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]); if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { outinfo->writes_pointsize = true; psize_value = LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], ""); + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], ""); } if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { outinfo->writes_layer = true; layer_value = LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], ""); + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], ""); } if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { outinfo->writes_viewport_index = true; viewport_index_value = LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], ""); + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], ""); } uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) | @@ -5412,7 +5509,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, for (unsigned j = 0; j < 4; j++) values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); if (i == VARYING_SLOT_LAYER) { target = V_008DFC_SQ_EXP_PARAM + param_count; @@ -5468,7 +5565,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, int j; uint64_t max_output_written = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; + LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; int param_index; int length = 4; @@ -5506,7 +5603,7 @@ handle_ls_outputs_post(struct nir_to_llvm_context *ctx) vertex_dw_stride, ""); for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; + LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; int length = 4; if (!(ctx->output_mask & (1ull << i))) @@ -5815,10 +5912,11 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, args.enabled_channels |= 0x4; } - /* SI (except OLAND) has a bug that it only looks + /* SI (except OLAND and HAINAN) has a bug that it only looks * at the X writemask component. */ if (ctx->options->chip_class == SI && - ctx->options->family != CHIP_OLAND) + ctx->options->family != CHIP_OLAND && + ctx->options->family != CHIP_HAINAN) args.enabled_channels |= 0x1; ac_build_export(&ctx->ac, &args); @@ -5840,20 +5938,20 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) if (i == FRAG_RESULT_DEPTH) { ctx->shader_info->fs.writes_z = true; depth = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else if (i == FRAG_RESULT_STENCIL) { ctx->shader_info->fs.writes_stencil = true; stencil = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else if (i == FRAG_RESULT_SAMPLE_MASK) { ctx->shader_info->fs.writes_sample_mask = true; samplemask = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else { bool last = false; for (unsigned j = 0; j < 4; j++) values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask) last = ctx->output_mask <= ((1ull << (i + 1)) - 1); @@ -5883,8 +5981,11 @@ emit_gs_epilogue(struct nir_to_llvm_context *ctx) } static void -handle_shader_outputs_post(struct nir_to_llvm_context *ctx) +handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, + LLVMValueRef *addrs) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + switch (ctx->stage) { case MESA_SHADER_VERTEX: if (ctx->options->key.vs.as_ls) @@ -6027,6 +6128,55 @@ ac_nir_get_max_workgroup_size(enum chip_class chip_class, return max_workgroup_size; } +void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, + struct nir_shader *nir, struct nir_to_llvm_context *nctx) +{ + struct ac_nir_context ctx = {}; + struct nir_function *func; + + ctx.ac = *ac; + ctx.abi = abi; + + ctx.nctx = nctx; + if (nctx) + nctx->nir = &ctx; + + ctx.stage = nir->stage; + + ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); + + nir_foreach_variable(variable, &nir->outputs) + handle_shader_output_decl(&ctx, nir, variable); + + ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + func = (struct nir_function *)exec_list_get_head(&nir->functions); + + setup_locals(&ctx, func); + + if (nir->stage == MESA_SHADER_COMPUTE) + setup_shared(&ctx, nir); + + visit_cf_list(&ctx, &func->impl->body); + phi_post_pass(&ctx); + + ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS, + ctx.outputs); + + free(ctx.locals); + ralloc_free(ctx.defs); + ralloc_free(ctx.phis); + ralloc_free(ctx.vars); + + if (nctx) + nctx->nir = NULL; +} + static LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, struct nir_shader *nir, @@ -6034,7 +6184,6 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, const struct ac_nir_compiler_options *options) { struct nir_to_llvm_context ctx = {0}; - struct nir_function *func; unsigned i; ctx.options = options; ctx.shader_info = shader_info; @@ -6049,7 +6198,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, memset(shader_info, 0, sizeof(*shader_info)); ac_nir_shader_info_pass(nir, options, &shader_info->info); - + LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); @@ -6073,54 +6222,46 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, create_function(&ctx); if (nir->stage == MESA_SHADER_GEOMETRY) { - ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex"); + ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.i32, "gs_next_vertex"); ctx.gs_max_out_vertices = nir->info.gs.vertices_out; } else if (nir->stage == MESA_SHADER_TESS_EVAL) { ctx.tes_primitive_mode = nir->info.tess.primitive_mode; + } else if (nir->stage == MESA_SHADER_VERTEX) { + if (shader_info->info.vs.needs_instance_id) { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt); + } + } else if (nir->stage == MESA_SHADER_FRAGMENT) { + shader_info->fs.can_discard = nir->info.fs.uses_discard; } ac_setup_rings(&ctx); + ctx.num_output_clips = nir->info.clip_distance_array_size; + ctx.num_output_culls = nir->info.cull_distance_array_size; + nir_foreach_variable(variable, &nir->inputs) handle_shader_input_decl(&ctx, variable); if (nir->stage == MESA_SHADER_FRAGMENT) handle_fs_inputs_pre(&ctx, nir); - ctx.num_output_clips = nir->info.clip_distance_array_size; - ctx.num_output_culls = nir->info.cull_distance_array_size; + ctx.abi.chip_class = options->chip_class; + ctx.abi.inputs = &ctx.inputs[0]; + ctx.abi.emit_outputs = handle_shader_outputs_post; + ctx.abi.load_sampler_desc = radv_get_sampler_desc; nir_foreach_variable(variable, &nir->outputs) - handle_shader_output_decl(&ctx, variable); + scan_shader_output_decl(&ctx, variable); - ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + ac_nir_translate(&ctx.ac, &ctx.abi, nir, &ctx); - func = (struct nir_function *)exec_list_get_head(&nir->functions); - - setup_locals(&ctx, func); - - if (nir->stage == MESA_SHADER_COMPUTE) - setup_shared(&ctx, nir); - - visit_cf_list(&ctx, &func->impl->body); - phi_post_pass(&ctx); - - handle_shader_outputs_post(&ctx); LLVMBuildRetVoid(ctx.builder); ac_llvm_finalize_module(&ctx); ac_nir_eliminate_const_vs_outputs(&ctx); - free(ctx.locals); - ralloc_free(ctx.defs); - ralloc_free(ctx.phis); - ralloc_free(ctx.vars); if (nir->stage == MESA_SHADER_GEOMETRY) { unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4; @@ -6317,7 +6458,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) { LLVMValueRef args[9]; args[0] = ctx->gsvs_ring; - args[1] = LLVMBuildMul(ctx->builder, ctx->vertex_id, LLVMConstInt(ctx->i32, 4, false), ""); + args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->i32, 4, false), ""); args[3] = ctx->i32zero; args[4] = ctx->i32one; /* OFFEN */ args[5] = ctx->i32zero; /* IDXEN */ @@ -6354,7 +6495,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) AC_FUNC_ATTR_LEGACY); LLVMBuildStore(ctx->builder, - to_float(&ctx->ac, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]); + to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]); } idx += slot_inc; } @@ -6394,11 +6535,22 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, ctx.num_output_clips = geom_shader->info.clip_distance_array_size; ctx.num_output_culls = geom_shader->info.cull_distance_array_size; - nir_foreach_variable(variable, &geom_shader->outputs) - handle_shader_output_decl(&ctx, variable); + struct ac_nir_context nir_ctx = {}; + nir_ctx.ac = ctx.ac; + nir_ctx.abi = &ctx.abi; + + nir_ctx.nctx = &ctx; + ctx.nir = &nir_ctx; + + nir_foreach_variable(variable, &geom_shader->outputs) { + scan_shader_output_decl(&ctx, variable); + handle_shader_output_decl(&nir_ctx, geom_shader, variable); + } ac_gs_copy_shader_emit(&ctx); + ctx.nir = NULL; + LLVMBuildRetVoid(ctx.builder); ac_llvm_finalize_module(&ctx);