X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Famd%2Fcommon%2Fac_nir_to_llvm.c;h=6467ed66ae5454a817d51009fb6603290b538a8d;hb=e6378962ce43727056756a373f5001da041b160e;hp=3b705b4b40dfc7ba5773a570d342b5517afbe320;hpb=5081fd398e52c365f326870bee9155a31e4d99ec;p=mesa.git diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 3b705b4b40d..6467ed66ae5 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -111,10 +111,7 @@ struct nir_to_llvm_context { LLVMValueRef oc_lds; LLVMValueRef merged_wave_info; LLVMValueRef tess_factor_offset; - LLVMValueRef tcs_patch_id; - LLVMValueRef tcs_rel_ids; LLVMValueRef tes_rel_patch_id; - LLVMValueRef tes_patch_id; LLVMValueRef tes_u; LLVMValueRef tes_v; @@ -404,7 +401,7 @@ static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx) { switch (ctx->stage) { case MESA_SHADER_TESS_CTRL: - return unpack_param(&ctx->ac, ctx->tcs_rel_ids, 0, 8); + return unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); case MESA_SHADER_TESS_EVAL: return ctx->tes_rel_patch_id; break; @@ -504,19 +501,26 @@ get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx) ""); } -static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs) +static void +set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs, + uint32_t indirect_offset) { ud_info->sgpr_idx = *sgpr_idx; ud_info->num_sgprs = num_sgprs; - ud_info->indirect = false; - ud_info->indirect_offset = 0; + ud_info->indirect = indirect_offset > 0; + ud_info->indirect_offset = indirect_offset; *sgpr_idx += num_sgprs; } -static void set_userdata_location_shader(struct nir_to_llvm_context *ctx, - int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) +static void +set_loc_shader(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx, + uint8_t num_sgprs) { - set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs); + struct ac_userdata_info *ud_info = + &ctx->shader_info->user_sgprs_locs.shader_data[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, num_sgprs, 0); } static void @@ -527,10 +531,7 @@ set_loc_desc(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx, &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; assert(ud_info); - ud_info->sgpr_idx = *sgpr_idx; - ud_info->num_sgprs = 2; - ud_info->indirect = indirect_offset > 0; - ud_info->indirect_offset = indirect_offset; + set_loc(ud_info, sgpr_idx, 2, indirect_offset); } struct user_sgpr_info { @@ -540,19 +541,20 @@ struct user_sgpr_info { }; static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, + gl_shader_stage stage, struct user_sgpr_info *user_sgpr_info) { memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ - if (ctx->stage == MESA_SHADER_GEOMETRY || - ctx->stage == MESA_SHADER_VERTEX || - ctx->stage == MESA_SHADER_TESS_CTRL || - ctx->stage == MESA_SHADER_TESS_EVAL || + if (stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || ctx->is_gs_copy_shader) user_sgpr_info->need_ring_offsets = true; - if (ctx->stage == MESA_SHADER_FRAGMENT && + if (stage == MESA_SHADER_FRAGMENT && ctx->shader_info->info.ps.needs_sample_positions) user_sgpr_info->need_ring_offsets = true; @@ -561,7 +563,8 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, user_sgpr_info->sgpr_count += 2; } - switch (ctx->stage) { + /* FIXME: fix the number of user sgprs for merged shaders on GFX9 */ + switch (stage) { case MESA_SHADER_COMPUTE: if (ctx->shader_info->info.cs.uses_grid_size) user_sgpr_info->sgpr_count += 3; @@ -594,10 +597,12 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, break; } - if (ctx->shader_info->info.needs_push_constants) + if (ctx->shader_info->info.loads_push_constants) user_sgpr_info->sgpr_count += 2; - uint32_t remaining_sgprs = 16 - user_sgpr_info->sgpr_count; + uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { user_sgpr_info->sgpr_count += 2; user_sgpr_info->indirect_all_descriptor_sets = true; @@ -635,7 +640,7 @@ declare_global_input_sgprs(struct nir_to_llvm_context *ctx, add_array_arg(args, const_array(type, 32), desc_sets); } - if (ctx->shader_info->info.needs_push_constants) { + if (ctx->shader_info->info.loads_push_constants) { /* 1 for push constants and dynamic descriptors */ add_array_arg(args, type, &ctx->push_constants); } @@ -668,9 +673,14 @@ declare_vs_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args) { add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id); if (!ctx->is_gs_copy_shader) { - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + if (ctx->options->key.vs.as_ls) { + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + } else { + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); + } + add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */ } } @@ -680,7 +690,7 @@ declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args) add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u); add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v); add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_patch_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); } static void @@ -704,9 +714,8 @@ set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, ctx->descriptor_sets[i] = NULL; } } else { - set_userdata_location_shader(ctx, - AC_UD_INDIRECT_DESCRIPTOR_SETS, - user_sgpr_idx, 2); + set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx, 2); for (unsigned i = 0; i < num_sets; ++i) { if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) { @@ -722,9 +731,8 @@ set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, ctx->shader_info->need_indirect_descriptor_sets = true; } - if (ctx->shader_info->info.needs_push_constants) { - set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, - user_sgpr_idx, 2); + if (ctx->shader_info->info.loads_push_constants) { + set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); } } @@ -738,18 +746,16 @@ set_vs_specific_input_locs(struct nir_to_llvm_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - set_userdata_location_shader(ctx, - AC_UD_VS_VERTEX_BUFFERS, - user_sgpr_idx, 2); + set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx, 2); } unsigned vs_num = 2; if (ctx->shader_info->info.vs.needs_draw_id) vs_num++; - set_userdata_location_shader(ctx, - AC_UD_VS_BASE_VERTEX_START_INSTANCE, - user_sgpr_idx, vs_num); + set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, + user_sgpr_idx, vs_num); } } @@ -763,7 +769,7 @@ static void create_function(struct nir_to_llvm_context *ctx, struct arg_info args = {}; LLVMValueRef desc_sets; - allocate_user_sgprs(ctx, &user_sgpr_info); + allocate_user_sgprs(ctx, stage, &user_sgpr_info); if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), @@ -850,9 +856,9 @@ static void create_function(struct nir_to_llvm_context *ctx, &ctx->view_index); add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->tcs_patch_id); + &ctx->abi.tcs_patch_id); add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->tcs_rel_ids); + &ctx->abi.tcs_rel_ids); declare_vs_input_vgprs(ctx, &args); } else { @@ -878,9 +884,9 @@ static void create_function(struct nir_to_llvm_context *ctx, add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tess_factor_offset); add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->tcs_patch_id); + &ctx->abi.tcs_patch_id); add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->tcs_rel_ids); + &ctx->abi.tcs_rel_ids); } break; case MESA_SHADER_TESS_EVAL: @@ -1042,7 +1048,8 @@ static void create_function(struct nir_to_llvm_context *ctx, user_sgpr_idx = 0; if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx, 2); + set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx, 2); if (ctx->options->supports_spill) { ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", LLVMPointerType(ctx->ac.i8, CONST_ADDR_SPACE), @@ -1063,17 +1070,18 @@ static void create_function(struct nir_to_llvm_context *ctx, switch (stage) { case MESA_SHADER_COMPUTE: if (ctx->shader_info->info.cs.uses_grid_size) { - set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, - &user_sgpr_idx, 3); + set_loc_shader(ctx, AC_UD_CS_GRID_SIZE, + &user_sgpr_idx, 3); } break; case MESA_SHADER_VERTEX: set_vs_specific_input_locs(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx); if (ctx->view_index) - set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); if (ctx->options->key.vs.as_ls) { - set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, + &user_sgpr_idx, 1); } if (ctx->options->key.vs.as_ls) ac_declare_lds_as_pointer(&ctx->ac); @@ -1082,16 +1090,17 @@ static void create_function(struct nir_to_llvm_context *ctx, set_vs_specific_input_locs(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx); if (has_previous_stage) - set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1); - set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); + set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, + &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); if (ctx->view_index) - set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); ac_declare_lds_as_pointer(&ctx->ac); break; case MESA_SHADER_TESS_EVAL: - set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); if (ctx->view_index) - set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_GEOMETRY: if (has_previous_stage) { @@ -1101,17 +1110,20 @@ static void create_function(struct nir_to_llvm_context *ctx, previous_stage, &user_sgpr_idx); else - set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, + &user_sgpr_idx, 1); } - set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, &user_sgpr_idx, 2); + set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, + &user_sgpr_idx, 2); if (ctx->view_index) - set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); if (has_previous_stage) ac_declare_lds_as_pointer(&ctx->ac); break; case MESA_SHADER_FRAGMENT: if (ctx->shader_info->info.ps.needs_sample_positions) { - set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, + &user_sgpr_idx, 1); } break; default: @@ -1121,32 +1133,10 @@ static void create_function(struct nir_to_llvm_context *ctx, ctx->shader_info->num_user_sgprs = user_sgpr_idx; } -static int get_llvm_num_components(LLVMValueRef value) -{ - LLVMTypeRef type = LLVMTypeOf(value); - unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind - ? LLVMGetVectorSize(type) - : 1; - return num_components; -} - -static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac, - LLVMValueRef value, - int index) -{ - int count = get_llvm_num_components(value); - - if (count == 1) - return value; - - return LLVMBuildExtractElement(ac->builder, value, - LLVMConstInt(ac->i32, index, false), ""); -} - static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) { - unsigned num_components = get_llvm_num_components(value); + unsigned num_components = ac_get_llvm_num_components(value); if (count == num_components) return value; @@ -1348,26 +1338,49 @@ static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, } static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, + unsigned bitsize) { - LLVMValueRef cmp, val; + LLVMValueRef cmp, val, zero, one; + LLVMTypeRef type; + + if (bitsize == 32) { + type = ctx->f32; + zero = ctx->f32_0; + one = ctx->f32_1; + } else { + type = ctx->f64; + zero = ctx->f64_0; + one = ctx->f64_1; + } - cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32_0, ""); - val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32_1, src0, ""); - cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32_0, ""); - val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), ""); + cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); + cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), ""); return val; } static LLVMValueRef emit_isign(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, unsigned bitsize) { - LLVMValueRef cmp, val; + LLVMValueRef cmp, val, zero, one; + LLVMTypeRef type; - cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32_0, ""); - val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32_1, src0, ""); - cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32_0, ""); - val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), ""); + if (bitsize == 32) { + type = ctx->i32; + zero = ctx->i32_0; + one = ctx->i32_1; + } else { + type = ctx->i64; + zero = ctx->i64_0; + one = ctx->i64_1; + } + + cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); + cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, ""); + val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), ""); return val; } @@ -1420,9 +1433,15 @@ static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, } static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, + unsigned bitsize) { - return LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); + + if (bitsize == 32) + return result; + + return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); } static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, @@ -1725,7 +1744,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_frcp: src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, src[0]); + result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, + src[0]); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -1804,11 +1824,11 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]); break; case nir_op_isign: - result = emit_isign(&ctx->ac, src[0]); + result = emit_isign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; case nir_op_fsign: src[0] = ac_to_float(&ctx->ac, src[0]); - result = emit_fsign(&ctx->ac, src[0]); + result = emit_fsign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; case nir_op_ffloor: result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", @@ -1852,7 +1872,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_frsq: result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]); - result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, result); + result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, + result); break; case nir_op_fpow: result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", @@ -1975,7 +1996,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_f2b(&ctx->ac, src[0]); break; case nir_op_b2i: - result = emit_b2i(&ctx->ac, src[0]); + result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; case nir_op_i2b: src[0] = ac_to_integer(&ctx->ac, src[0]); @@ -2271,7 +2292,9 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_samples_identical: - args->opcode = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip; + args->opcode = lod_is_zero || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? + ac_image_load : ac_image_load_mip; args->compare = false; args->offset = false; break; @@ -2447,7 +2470,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, } else { assert(count == 1); - if (get_llvm_num_components(base_data) > 1) + if (ac_get_llvm_num_components(base_data) > 1) data = LLVMBuildExtractElement(ctx->ac.builder, base_data, LLVMConstInt(ctx->ac.i32, start, false), ""); else @@ -2474,9 +2497,9 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, int arg_count = 0; if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); } - params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); params[arg_count++] = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[0]), true); @@ -2588,7 +2611,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - LLVMValueRef results[8], ret; + LLVMValueRef ret; LLVMValueRef rsrc = get_src(ctx, instr->src[0]); LLVMValueRef offset = get_src(ctx, instr->src[1]); int num_components = instr->num_components; @@ -2599,20 +2622,9 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, if (instr->dest.ssa.bit_size == 64) num_components *= 2; - for (unsigned i = 0; i < num_components; ++i) { - LLVMValueRef params[] = { - rsrc, - LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 4 * i, 0), - offset, "") - }; - results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->ac.f32, - params, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_LEGACY); - } - - - ret = ac_build_gather_values(&ctx->ac, results, num_components); + ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, + NULL, 0, false, false, true, true); + ret = trim_vector(&ctx->ac, ret, num_components); return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } @@ -2826,35 +2838,33 @@ get_dw_address(struct nir_to_llvm_context *ctx, } static LLVMValueRef -load_tcs_input(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +load_tcs_input(struct ac_shader_abi *abi, + LLVMValueRef vertex_index, + LLVMValueRef indir_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); LLVMValueRef dw_addr, stride; - unsigned const_index; - LLVMValueRef vertex_index; - LLVMValueRef indir_index; - unsigned param; LLVMValueRef value[4], result; - const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); - const bool is_compact = instr->variables[0]->var->data.compact; - param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - get_deref_offset(ctx->nir, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); + unsigned param = shader_io_get_unique_index(location); stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); dw_addr = get_tcs_in_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, indir_index); - unsigned comp = instr->variables[0]->var->data.location_frac; - for (unsigned i = 0; i < instr->num_components + comp; i++) { + for (unsigned i = 0; i < num_components + component; i++) { value[i] = ac_lds_load(&ctx->ac, dw_addr); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, ""); } - result = ac_build_varying_gather_values(&ctx->ac, value, instr->num_components, comp); - result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); + result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); return result; } @@ -2898,65 +2908,64 @@ load_tcs_output(struct nir_to_llvm_context *ctx, } static void -store_tcs_output(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr, +store_tcs_output(struct ac_shader_abi *abi, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + unsigned location, + unsigned driver_location, LLVMValueRef src, + unsigned component, + bool is_patch, + bool is_compact, unsigned writemask) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); LLVMValueRef dw_addr; LLVMValueRef stride = NULL; LLVMValueRef buf_addr = NULL; - LLVMValueRef vertex_index = NULL; - LLVMValueRef indir_index = NULL; - unsigned const_index = 0; unsigned param; - const unsigned comp = instr->variables[0]->var->data.location_frac; - const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); - const bool is_compact = instr->variables[0]->var->data.compact; bool store_lds = true; - if (instr->variables[0]->var->data.patch) { - if (!(ctx->tcs_patch_outputs_read & (1U << (instr->variables[0]->var->data.location - VARYING_SLOT_PATCH0)))) + if (is_patch) { + if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0)))) store_lds = false; } else { - if (!(ctx->tcs_outputs_read & (1ULL << instr->variables[0]->var->data.location))) + if (!(ctx->tcs_outputs_read & (1ULL << location))) store_lds = false; } - get_deref_offset(ctx->nir, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); - param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && + param = shader_io_get_unique_index(location); + if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { const_index -= 3; param++; } - if (!instr->variables[0]->var->data.patch) { + if (!is_patch) { stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); dw_addr = get_tcs_out_current_patch_offset(ctx); } else { dw_addr = get_tcs_out_current_patch_data_offset(ctx); } - mark_tess_output(ctx, instr->variables[0]->var->data.patch, param); + mark_tess_output(ctx, is_patch, param); dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, - indir_index); + param_index); buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, - vertex_index, indir_index); + vertex_index, param_index); bool is_tess_factor = false; - if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || - instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) + if (location == VARYING_SLOT_TESS_LEVEL_INNER || + location == VARYING_SLOT_TESS_LEVEL_OUTER) is_tess_factor = true; unsigned base = is_compact ? const_index : 0; for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; - LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - comp); + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); if (store_lds || is_tess_factor) ac_lds_store(&ctx->ac, dw_addr, value); @@ -2978,39 +2987,36 @@ store_tcs_output(struct nir_to_llvm_context *ctx, } static LLVMValueRef -load_tes_input(struct nir_to_llvm_context *ctx, - const nir_intrinsic_instr *instr) +load_tes_input(struct ac_shader_abi *abi, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); LLVMValueRef buf_addr; LLVMValueRef result; - LLVMValueRef vertex_index = NULL; - LLVMValueRef indir_index = NULL; - unsigned const_index = 0; - unsigned param; - const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); - const bool is_compact = instr->variables[0]->var->data.compact; + unsigned param = shader_io_get_unique_index(location); - get_deref_offset(ctx->nir, instr->variables[0], - false, NULL, per_vertex ? &vertex_index : NULL, - &const_index, &indir_index); - param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && - is_compact && const_index > 3) { + if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { const_index -= 3; param++; } - unsigned comp = instr->variables[0]->var->data.location_frac; buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, - is_compact, vertex_index, indir_index); + is_compact, vertex_index, param_index); - LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, comp * 4, false); + LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false); buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, ""); - result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL, + result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL, buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); - result = trim_vector(&ctx->ac, result, instr->num_components); - result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); + result = trim_vector(&ctx->ac, result, num_components); return result; } @@ -3114,6 +3120,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, LLVMValueRef indir_index; LLVMValueRef ret; unsigned const_index; + unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4; bool vs_in = ctx->stage == MESA_SHADER_VERTEX && instr->variables[0]->var->data.mode == nir_var_shader_in; get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, @@ -3124,10 +3131,29 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, switch (instr->variables[0]->var->data.mode) { case nir_var_shader_in: - if (ctx->stage == MESA_SHADER_TESS_CTRL) - return load_tcs_input(ctx->nctx, instr); - if (ctx->stage == MESA_SHADER_TESS_EVAL) - return load_tes_input(ctx->nctx, instr); + if (ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL) { + LLVMValueRef result; + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + unsigned location = instr->variables[0]->var->data.location; + unsigned driver_location = instr->variables[0]->var->data.driver_location; + const bool is_patch = instr->variables[0]->var->data.patch; + const bool is_compact = instr->variables[0]->var->data.compact; + + get_deref_offset(ctx, instr->variables[0], + false, NULL, is_patch ? NULL : &vertex_index, + &const_index, &indir_index); + + result = ctx->abi->load_tess_inputs(ctx->abi, vertex_index, indir_index, + const_index, location, driver_location, + instr->variables[0]->var->data.location_frac, + instr->num_components, + is_patch, is_compact); + return LLVMBuildBitCast(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), ""); + } + if (ctx->stage == MESA_SHADER_GEOMETRY) { LLVMValueRef indir_index; unsigned const_index, vertex_index; @@ -3149,13 +3175,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->abi->inputs + idx + chan, count, - 4, false, true); + stride, false, true); values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else - values[chan] = ctx->abi->inputs[idx + chan + const_index * 4]; + values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; } break; case nir_var_local: @@ -3166,13 +3192,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->locals + idx + chan, count, - 4, true, true); + stride, true, true); values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else { - values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], ""); + values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], ""); } } break; @@ -3195,14 +3221,14 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->outputs + idx + chan, count, - 4, true, true); + stride, true, true); values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else { values[chan] = LLVMBuildLoad(ctx->ac.builder, - ctx->outputs[idx + chan + const_index * 4], + ctx->outputs[idx + chan + const_index * stride], ""); } } @@ -3232,7 +3258,7 @@ visit_store_var(struct ac_nir_context *ctx, int old_writemask = writemask; src = LLVMBuildBitCast(ctx->ac.builder, src, - LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2), + LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), ""); writemask = 0; @@ -3246,7 +3272,22 @@ visit_store_var(struct ac_nir_context *ctx, case nir_var_shader_out: if (ctx->stage == MESA_SHADER_TESS_CTRL) { - store_tcs_output(ctx->nctx, instr, src, writemask); + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + const unsigned location = instr->variables[0]->var->data.location; + const unsigned driver_location = instr->variables[0]->var->data.driver_location; + const unsigned comp = instr->variables[0]->var->data.location_frac; + const bool is_patch = instr->variables[0]->var->data.patch; + const bool is_compact = instr->variables[0]->var->data.compact; + + get_deref_offset(ctx, instr->variables[0], + false, NULL, is_patch ? NULL : &vertex_index, + &const_index, &indir_index); + + ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, + const_index, location, driver_location, + src, comp, is_patch, is_compact, writemask); return; } @@ -3255,7 +3296,7 @@ visit_store_var(struct ac_nir_context *ctx, if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(&ctx->ac, src, chan - comp); + value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); if (instr->variables[0]->var->data.compact) stride = 1; @@ -3284,7 +3325,7 @@ visit_store_var(struct ac_nir_context *ctx, if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(&ctx->ac, src, chan); + value = ac_llvm_extract_elem(&ctx->ac, src, chan); if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); @@ -3324,8 +3365,8 @@ visit_store_var(struct ac_nir_context *ctx, LLVMValueRef ptr = LLVMBuildStructGEP(ctx->ac.builder, address, chan, ""); - LLVMValueRef src = llvm_extract_elem(&ctx->ac, val, - chan); + LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, + chan); src = LLVMBuildBitCast( ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); @@ -3457,7 +3498,7 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), }; LLVMValueRef res; - LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); + LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); int count; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); @@ -3504,7 +3545,7 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, if (is_ms) count--; for (chan = 0; chan < count; ++chan) { - coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan); + coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); } if (add_frag_pos) { for (chan = 0; chan < 2; ++chan) @@ -3812,19 +3853,18 @@ static void emit_membar(struct nir_to_llvm_context *ctx, ac_build_waitcnt(&ctx->ac, waitcnt); } -static void emit_barrier(struct nir_to_llvm_context *ctx) +static void emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) { /* SI only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->options->chip_class == SI && - ctx->stage == MESA_SHADER_TESS_CTRL) { - ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT); + if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { + ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); return; } - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier", - ctx->ac.voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); + ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier", + ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } static void emit_discard_if(struct ac_nir_context *ctx, @@ -4137,9 +4177,11 @@ visit_end_primitive(struct nir_to_llvm_context *ctx, } static LLVMValueRef -visit_load_tess_coord(struct nir_to_llvm_context *ctx, - const nir_intrinsic_instr *instr) +load_tess_coord(struct ac_shader_abi *abi, LLVMTypeRef type, + unsigned num_components) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + LLVMValueRef coord[4] = { ctx->tes_u, ctx->tes_v, @@ -4151,9 +4193,15 @@ visit_load_tess_coord(struct nir_to_llvm_context *ctx, coord[2] = LLVMBuildFSub(ctx->builder, ctx->ac.f32_1, LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), ""); - LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components); - return LLVMBuildBitCast(ctx->builder, result, - get_def_type(ctx->nir, &instr->dest.ssa), ""); + LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, num_components); + return LLVMBuildBitCast(ctx->builder, result, type, ""); +} + +static LLVMValueRef +load_patch_vertices_in(struct ac_shader_abi *abi) +{ + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false); } static void visit_intrinsic(struct ac_nir_context *ctx, @@ -4196,21 +4244,17 @@ static void visit_intrinsic(struct ac_nir_context *ctx, break; case nir_intrinsic_load_invocation_id: if (ctx->stage == MESA_SHADER_TESS_CTRL) - result = unpack_param(&ctx->ac, ctx->nctx->tcs_rel_ids, 8, 5); + result = unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5); else result = ctx->abi->gs_invocation_id; break; case nir_intrinsic_load_primitive_id: if (ctx->stage == MESA_SHADER_GEOMETRY) { - if (ctx->nctx) - ctx->nctx->shader_info->gs.uses_prim_id = true; result = ctx->abi->gs_prim_id; } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { - ctx->nctx->shader_info->tcs.uses_prim_id = true; - result = ctx->nctx->tcs_patch_id; + result = ctx->abi->tcs_patch_id; } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - ctx->nctx->shader_info->tcs.uses_prim_id = true; - result = ctx->nctx->tes_patch_id; + result = ctx->abi->tes_patch_id; } else fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); break; @@ -4320,7 +4364,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, emit_membar(ctx->nctx, instr); break; case nir_intrinsic_barrier: - emit_barrier(ctx->nctx); + emit_barrier(&ctx->ac, ctx->stage); break; case nir_intrinsic_var_atomic_add: case nir_intrinsic_var_atomic_imin: @@ -4346,11 +4390,21 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_end_primitive: visit_end_primitive(ctx->nctx, instr); break; - case nir_intrinsic_load_tess_coord: - result = visit_load_tess_coord(ctx->nctx, instr); + case nir_intrinsic_load_tess_coord: { + LLVMTypeRef type = ctx->nctx ? + get_def_type(ctx->nctx->nir, &instr->dest.ssa) : + NULL; + result = ctx->abi->load_tess_coord(ctx->abi, type, instr->num_components); + break; + } + case nir_intrinsic_load_tess_level_outer: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER); + break; + case nir_intrinsic_load_tess_level_inner: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER); break; case nir_intrinsic_load_patch_vertices_in: - result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false); + result = ctx->abi->load_patch_vertices_in(ctx->abi); break; default: fprintf(stderr, "Unknown intrinsic: "); @@ -4702,7 +4756,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) if (coord) for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan); + coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); if (offsets && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; @@ -4710,8 +4764,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) offset[chan] = ctx->ac.i32_0; args.offset = true; - for (chan = 0; chan < get_llvm_num_components(offsets); chan++) { - offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan); + for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) @@ -4731,7 +4785,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) /* Pack depth comparison value */ if (instr->is_shadow && comparator) { LLVMValueRef z = ac_to_float(&ctx->ac, - llvm_extract_elem(&ctx->ac, comparator, 0)); + ac_llvm_extract_elem(&ctx->ac, comparator, 0)); /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, * so the depth comparison value isn't clamped for Z16 and @@ -4775,8 +4829,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } for (unsigned i = 0; i < num_src_deriv_channels; i++) { - derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i)); - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddy, i)); + derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); + derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); } for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { derivs[i] = ctx->ac.f32_0; @@ -5158,8 +5212,13 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) { buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id, ctx->abi.start_instance, ""); - ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, - ctx->shader_info->vs.vgpr_comp_cnt); + if (ctx->options->key.vs.as_ls) { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + } else { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + } } else buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id, ctx->abi.base_vertex, ""); @@ -5535,6 +5594,7 @@ setup_locals(struct ac_nir_context *ctx, nir_foreach_variable(variable, &func->impl->locals) { unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); variable->data.driver_location = ctx->num_locals * 4; + variable->data.location_frac = 0; ctx->num_locals += attrib_count; } ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); @@ -6142,13 +6202,13 @@ write_tess_factors(struct nir_to_llvm_context *ctx) { unsigned stride, outer_comps, inner_comps; struct ac_build_if_state if_ctx, inner_if_ctx; - LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->tcs_rel_ids, 8, 5); - LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->tcs_rel_ids, 0, 8); + LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5); + LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); unsigned tess_inner_index, tess_outer_index; LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; int i; - emit_barrier(ctx); + emit_barrier(&ctx->ac, ctx->stage); switch (ctx->options->key.tcs.primitive_mode) { case GL_ISOLINES: @@ -6532,8 +6592,8 @@ static void ac_nir_fixup_ls_hs_input_vgprs(struct nir_to_llvm_context *ctx) ctx->ac.i32_0, ""); ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); - ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->tcs_rel_ids, ctx->rel_auto_id, ""); - ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->tcs_patch_id, ctx->abi.vertex_id, ""); + ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); + ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); } static void prepare_gs_input_vgprs(struct nir_to_llvm_context *ctx) @@ -6675,19 +6735,31 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { ctx.tcs_outputs_read = shaders[i]->info.outputs_read; ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read; + ctx.abi.load_tess_inputs = load_tcs_input; + ctx.abi.load_patch_vertices_in = load_patch_vertices_in; + ctx.abi.store_tcs_outputs = store_tcs_output; } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; + ctx.abi.load_tess_inputs = load_tes_input; + ctx.abi.load_tess_coord = load_tess_coord; + ctx.abi.load_patch_vertices_in = load_patch_vertices_in; } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { if (shader_info->info.vs.needs_instance_id) { - ctx.shader_info->vs.vgpr_comp_cnt = - MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt); + if (ctx.ac.chip_class == GFX9 && + shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt); + } else { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt); + } } } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; } if (i) - emit_barrier(&ctx); + emit_barrier(&ctx.ac, ctx.stage); ac_setup_rings(&ctx); @@ -6873,6 +6945,20 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm, /* +3 for scratch wave offset and VCC */ config->num_sgprs = MAX2(config->num_sgprs, shader_info->num_input_sgprs + 3); + + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * If denormals are enabled, all floating-point output modifiers are + * ignored. + * + * Don't enable denormals for 32-bit floats, because: + * - Floating-point output modifiers would be ignored by the hw. + * - Some opcodes don't support denormals, such as v_mad_f32. We would + * have to stop using those. + * - SI & CI would be very slow. + */ + config->float_mode |= V_00B028_FP_64_DENORMS; } static void @@ -6905,7 +6991,7 @@ ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shade case MESA_SHADER_VERTEX: shader_info->vs.as_es = options->key.vs.as_es; shader_info->vs.as_ls = options->key.vs.as_ls; - /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */ + /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */ if (options->key.vs.as_ls) shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); break; @@ -6930,6 +7016,14 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill); for (int i = 0; i < nir_count; ++i) ac_fill_shader_info(shader_info, nir[i], options); + + /* Determine the ES type (VS or TES) for the GS on GFX9. */ + if (options->chip_class == GFX9) { + if (nir_count == 2 && + nir[1]->info.stage == MESA_SHADER_GEOMETRY) { + shader_info->gs.es_type = nir[0]->info.stage; + } + } } static void