From: Marek Olšák Date: Mon, 1 Jan 2018 20:04:22 +0000 (+0100) Subject: radeonsi: implement 32-bit pointers in user data SGPRs (v2) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=931ec80eebbfd6a301a828daa0c9945f49533611;p=mesa.git radeonsi: implement 32-bit pointers in user data SGPRs (v2) User SGPRs changes: VS: 14 -> 9 TCS: 14 -> 10 TES: 10 -> 6 GS: 8 -> 4 GSCOPY: 2 -> 1 PS: 9 -> 5 Merged VS-TCS: 24 -> 16 Merged VS-GS: 18 -> 11 Merged TES-GS: 18 -> 11 SGPRS: 2170102 -> 2158430 (-0.54 %) VGPRS: 1645656 -> 1641516 (-0.25 %) Spilled SGPRs: 9078 -> 8810 (-2.95 %) Spilled VGPRs: 130 -> 114 (-12.31 %) Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread Code Size: 52094872 -> 52692540 (1.15 %) bytes Max Waves: 371848 -> 372723 (0.24 %) v2: - the shader cache needs to take address32_hi into account - set amdgpu-32bit-address-high-bits Reviewed-by: Samuel Pitoiset (v1) --- diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index f3d4effbd05..15144addb9b 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -64,6 +64,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); + ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); @@ -158,7 +159,10 @@ ac_get_type_size(LLVMTypeRef type) case LLVMFloatTypeKind: return 4; case LLVMDoubleTypeKind: + return 8; case LLVMPointerTypeKind: + if (LLVMGetPointerAddressSpace(type) == AC_CONST_32BIT_ADDR_SPACE) + return 4; return 8; case LLVMVectorTypeKind: return LLVMGetVectorSize(type) * @@ -2051,3 +2055,12 @@ LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) return LLVMPointerType(LLVMArrayType(elem_type, 0), AC_CONST_ADDR_SPACE); } + +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) +{ + if (!HAVE_32BIT_POINTERS) + return ac_array_in_const_addr_space(elem_type); + + return LLVMPointerType(LLVMArrayType(elem_type, 0), + AC_CONST_32BIT_ADDR_SPACE); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index a4d80cd8b00..0a49ad8ca13 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -34,10 +34,13 @@ extern "C" { #endif +#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700) + enum { /* CONST is the only address space that selects SMEM loads */ AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2, AC_LOCAL_ADDR_SPACE = 3, + AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */ }; struct ac_llvm_context { @@ -51,6 +54,7 @@ struct ac_llvm_context { LLVMTypeRef i16; LLVMTypeRef i32; LLVMTypeRef i64; + LLVMTypeRef intptr; LLVMTypeRef f16; LLVMTypeRef f32; LLVMTypeRef f64; @@ -355,6 +359,7 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMValueRef src0); LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type); +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type); #ifdef __cplusplus } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 96525fd21b0..f28bd0ffb0a 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2003,17 +2003,22 @@ static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs, unsigned sh_base, unsigned pointer_count) { - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0)); + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0)); radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); } -static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs, +static void si_emit_shader_pointer_body(struct si_screen *sscreen, + struct radeon_winsys_cs *cs, struct si_descriptors *desc) { uint64_t va = desc->gpu_address; radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + + if (HAVE_32BIT_POINTERS) + assert((va >> 32) == sscreen->info.address32_hi); + else + radeon_emit(cs, va >> 32); } static void si_emit_shader_pointer(struct si_context *sctx, @@ -2023,7 +2028,7 @@ static void si_emit_shader_pointer(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.gfx.cs; si_emit_shader_pointer_head(cs, desc, sh_base, 1); - si_emit_shader_pointer_body(cs, desc); + si_emit_shader_pointer_body(sctx->screen, cs, desc); } static void si_emit_consecutive_shader_pointers(struct si_context *sctx, @@ -2044,7 +2049,7 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx, si_emit_shader_pointer_head(cs, descs, sh_base, count); for (int i = 0; i < count; i++) - si_emit_shader_pointer_body(cs, descs + i); + si_emit_shader_pointer_body(sctx->screen, cs, descs + i); } } @@ -2566,8 +2571,10 @@ void si_init_all_descriptors(struct si_context *sctx) { int i; +#if !HAVE_32BIT_POINTERS STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0); STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0); +#endif for (i = 0; i < SI_NUM_SHADERS; i++) { bool gfx9_tcs = false; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 11915453082..f07ec50ab7b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -634,12 +634,18 @@ static void si_disk_cache_create(struct si_screen *sscreen) if (res != -1) { /* These flags affect shader compilation. */ - uint64_t shader_debug_flags = - sscreen->debug_flags & - (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | - DBG(SI_SCHED) | - DBG(UNSAFE_MATH) | - DBG(NIR)); + #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ + DBG(SI_SCHED) | \ + DBG(UNSAFE_MATH) | \ + DBG(NIR)) + uint64_t shader_debug_flags = sscreen->debug_flags & + ALL_FLAGS; + + /* Add the high bits of 32-bit addresses, which affects + * how 32-bit addresses are expanded to 64 bits. + */ + STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); + shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32; sscreen->disk_shader_cache = disk_cache_create(si_get_family_name(sscreen), diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ec03f537d0c..1f2338ad6d0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3227,12 +3227,18 @@ si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret, } static LLVMValueRef -si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret, - unsigned param, unsigned return_index) +si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret, + unsigned param, unsigned return_index) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef ptr, lo, hi; + if (HAVE_32BIT_POINTERS) { + ptr = LLVMGetParam(ctx->main_fn, param); + ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); + return LLVMBuildInsertValue(builder, ret, ptr, return_index, ""); + } + ptr = LLVMGetParam(ctx->main_fn, param); ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, ""); ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, ""); @@ -3348,11 +3354,11 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, - 8 + SI_SGPR_RW_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, - ctx->param_bindless_samplers_and_images, - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); + ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, + 8 + SI_SGPR_RW_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, + ctx->param_bindless_samplers_and_images, + 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); @@ -3367,11 +3373,12 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k, 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K); - unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2; - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, - 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); + unsigned desc_param = ctx->param_tcs_factor_addr_base64k + + (HAVE_32BIT_POINTERS ? 1 : 2); + ret = si_insert_input_ptr(ctx, ret, desc_param, + 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, desc_param + 1, + 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = LLVMBuildInsertValue(ctx->ac.builder, ret, @@ -3392,17 +3399,17 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, - 8 + SI_SGPR_RW_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, - ctx->param_bindless_samplers_and_images, - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); + ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, + 8 + SI_SGPR_RW_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, + ctx->param_bindless_samplers_and_images, + 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); unsigned desc_param = ctx->param_vs_state_bits + 1; - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, - 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); + ret = si_insert_input_ptr(ctx, ret, desc_param, + 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, desc_param + 1, + 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; for (unsigned i = 0; i < 5; i++) { @@ -4395,6 +4402,9 @@ static void si_create_function(struct si_shader_context *ctx, *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i); } + si_llvm_add_attribute(ctx->main_fn, "amdgpu-32bit-address-high-bits", + ctx->screen->info.address32_hi); + if (max_workgroup_size) { si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size", max_workgroup_size); @@ -4491,11 +4501,11 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, unsigned const_and_shader_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(const_shader_buf_type)); + ac_array_in_const32_addr_space(const_shader_buf_type)); unsigned samplers_and_images = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v8i32)); + ac_array_in_const32_addr_space(ctx->v8i32)); if (assign_params) { ctx->param_const_and_shader_buffers = const_and_shader_buffers; @@ -4507,16 +4517,16 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v4i32)); + ac_array_in_const32_addr_space(ctx->v4i32)); ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v8i32)); + ac_array_in_const32_addr_space(ctx->v8i32)); } static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v4i32)); + ac_array_in_const32_addr_space(ctx->v4i32)); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); @@ -4684,7 +4694,8 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + if (!HAVE_32BIT_POINTERS) + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_TESS_CTRL); @@ -4740,7 +4751,8 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + if (!HAVE_32BIT_POINTERS) + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } @@ -6475,6 +6487,11 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, unsigned size = ac_get_type_size(param_type) / 4; if (size == 1) { + if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) { + param = LLVMBuildPtrToInt(builder, param, ctx->i32, ""); + param_type = ctx->i32; + } + if (param_type != out_type) param = LLVMBuildBitCast(builder, param, out_type, ""); out[num_out++] = param; @@ -6550,8 +6567,14 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, if (LLVMTypeOf(arg) != param_type) { if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) { - arg = LLVMBuildBitCast(builder, arg, ctx->i64, ""); - arg = LLVMBuildIntToPtr(builder, arg, param_type, ""); + if (LLVMGetPointerAddressSpace(param_type) == + AC_CONST_32BIT_ADDR_SPACE) { + arg = LLVMBuildBitCast(builder, arg, ctx->i32, ""); + arg = LLVMBuildIntToPtr(builder, arg, param_type, ""); + } else { + arg = LLVMBuildBitCast(builder, arg, ctx->i64, ""); + arg = LLVMBuildIntToPtr(builder, arg, param_type, ""); + } } else { arg = LLVMBuildBitCast(builder, arg, param_type, ""); } @@ -7026,9 +7049,16 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) ctx->type == PIPE_SHADER_GEOMETRY || ctx->shader->key.as_ls || ctx->shader->key.as_es); + if (HAVE_32BIT_POINTERS) { + ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); + list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], + ac_array_in_const32_addr_space(ctx->v4i32), ""); + return list; + } + /* Get the pointer to rw buffers. */ ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI); + ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1); list = lp_build_gather_values(&ctx->gallivm, ptr, 2); list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); list = LLVMBuildIntToPtr(ctx->ac.builder, list, @@ -7241,11 +7271,11 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -7256,10 +7286,10 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else { - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -7661,10 +7691,10 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, si_init_function_info(&fninfo); /* Declare input SGPRs. */ - ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64); - ctx->param_bindless_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64); - ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64); - ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64); + ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + ctx->param_bindless_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF); /* Declare input VGPRs. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 3cc49caf214..ef4472ba994 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -136,6 +136,7 @@ #include "util/u_queue.h" #include "ac_binary.h" +#include "ac_llvm_build.h" #include "si_state.h" struct nir_shader; @@ -150,18 +151,28 @@ struct nir_shader; /* SGPR user data indices */ enum { SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ +#if !HAVE_32BIT_POINTERS SI_SGPR_RW_BUFFERS_HI, +#endif SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI, +#endif SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */ +#if !HAVE_32BIT_POINTERS SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, +#endif SI_SGPR_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS SI_SGPR_SAMPLERS_AND_IMAGES_HI, +#endif SI_NUM_RESOURCE_SGPRS, /* all VS variants */ SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS, +#if !HAVE_32BIT_POINTERS SI_SGPR_VERTEX_BUFFERS_HI, +#endif SI_SGPR_BASE_VERTEX, SI_SGPR_START_INSTANCE, SI_SGPR_DRAWID, @@ -190,23 +201,33 @@ enum { GFX9_SGPR_TCS_OUT_LAYOUT, GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K, GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_unused_to_align_the_next_pointer, +#endif GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI, +#endif GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI, +#endif GFX9_TCS_NUM_USER_SGPR, /* GFX9: Merged ES-GS (VS-GS or TES-GS). */ GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI, +#endif GFX9_SGPR_GS_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI, +#endif GFX9_GS_NUM_USER_SGPR, /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, - SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1, + SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2), /* PS only */ SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 3dd151bca82..9e2a0ebbb55 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -140,7 +140,7 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); } else { assert(desc_type == AC_DESC_IMAGE); } @@ -1107,7 +1107,7 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); break; case AC_DESC_FMASK: /* The FMASK is at [8:15]. */ @@ -1119,7 +1119,7 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); list = LLVMBuildPointerCast(builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); break; }