From: Marek Olšák Date: Sat, 3 Feb 2018 02:19:25 +0000 (+0100) Subject: radeonsi: move tess ring address into TCS_OUT_LAYOUT, removes 2 TCS user SGPRs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2d03c4cac8dfac5cd2ba2f420a2336d3bf516860;p=mesa.git radeonsi: move tess ring address into TCS_OUT_LAYOUT, removes 2 TCS user SGPRs TCS_OUT_LAYOUT has 13 unused bits. That's enough for a 32-bit address aligned to 512KB. Hey, it's a 13-bit pointer! Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8e975a597c6..c88ea87d8ad 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1175,28 +1175,46 @@ static void lds_store(struct si_shader_context *ctx, ac_lds_store(&ctx->ac, dw_addr, value); } -static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx, - unsigned param) +enum si_tess_ring { + TCS_FACTOR_RING, + TESS_OFFCHIP_RING_TCS, + TESS_OFFCHIP_RING_TES, +}; + +static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, + enum si_tess_ring ring) { LLVMBuilderRef builder = ctx->ac.builder; - + unsigned param = ring == TESS_OFFCHIP_RING_TES ? ctx->param_tes_offchip_addr : + ctx->param_tcs_out_lds_layout; LLVMValueRef addr = LLVMGetParam(ctx->main_fn, param); - addr = LLVMBuildZExt(builder, addr, ctx->i64, ""); - addr = LLVMBuildShl(builder, addr, LLVMConstInt(ctx->i64, 16, 0), ""); - uint64_t desc2 = 0xffffffff; - uint64_t desc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - LLVMValueRef hi = LLVMConstInt(ctx->i64, desc2 | (desc3 << 32), 0); + /* TCS only receives high 13 bits of the address. */ + if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) { + addr = LLVMBuildAnd(builder, addr, + LLVMConstInt(ctx->i32, 0xfff80000, 0), ""); + } + + if (ring == TCS_FACTOR_RING) { + unsigned tf_offset = ctx->screen->tess_offchip_ring_size; + addr = LLVMBuildAdd(builder, addr, + LLVMConstInt(ctx->i32, tf_offset, 0), ""); + } - LLVMValueRef desc = LLVMGetUndef(LLVMVectorType(ctx->i64, 2)); - desc = LLVMBuildInsertElement(builder, desc, addr, ctx->i32_0, ""); - desc = LLVMBuildInsertElement(builder, desc, hi, ctx->i32_1, ""); - return LLVMBuildBitCast(builder, desc, ctx->v4i32, ""); + LLVMValueRef desc[4]; + desc[0] = addr; + desc[1] = LLVMConstInt(ctx->i32, + S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); + desc[2] = LLVMConstInt(ctx->i32, 0xffffffff, 0); + desc[3] = LLVMConstInt(ctx->i32, + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0); + + return ac_build_gather_values(&ctx->ac, desc, 4); } static LLVMValueRef fetch_input_tcs( @@ -1307,7 +1325,7 @@ static LLVMValueRef fetch_input_tes( struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef buffer, base, addr; - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg); @@ -1335,7 +1353,7 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, driver_location = driver_location / 4; - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); @@ -1416,7 +1434,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, } } - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL); @@ -1531,7 +1549,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, } } - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); @@ -1978,7 +1996,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx, int param = si_shader_io_get_unique_index_patch(semantic_name, 0); - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL, @@ -3002,7 +3020,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) uint64_t inputs; invocation_id = unpack_llvm_param(ctx, ctx->abi.tcs_rel_ids, 8, 5); - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx); @@ -3136,7 +3154,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, vec1 = lp_build_gather_values(&ctx->gallivm, out+4, stride - 4); /* Get the buffer. */ - buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_factor_addr_base64k); + buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING); /* Get the offset. */ tf_base = LLVMGetParam(ctx->main_fn, @@ -3176,7 +3194,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef tf_inner_offset; unsigned param_outer, param_inner; - buf = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); + buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); param_outer = si_shader_io_get_unique_index_patch( @@ -3295,21 +3313,17 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, if (ctx->screen->info.chip_class >= GFX9) { ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k, - 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K); - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k, - 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K); + ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout, + 8 + GFX9_SGPR_TCS_OUT_LAYOUT); /* Tess offchip and tess factor offsets are at the beginning. */ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); - vgpr = 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K + 1; + vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1; } else { ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT); - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k, - GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K); - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k, - GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K); + ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout, + GFX6_SGPR_TCS_OUT_LAYOUT); /* Tess offchip and tess factor offsets are after user SGPRs. */ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR); @@ -3379,10 +3393,6 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) 8 + GFX9_SGPR_TCS_OUT_OFFSETS); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k, - 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K); - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k, - 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K); unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = LLVMBuildInsertValue(ctx->ac.builder, ret, @@ -4675,8 +4685,6 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4722,8 +4730,6 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* VGPRs (first TCS, then VS) */ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id); @@ -4745,7 +4751,7 @@ static void create_function(struct si_shader_context *ctx) * param_tcs_offchip_layout, and param_rw_buffers * should be passed to the epilog. */ - for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++) + for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 11; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ @@ -4779,7 +4785,7 @@ static void create_function(struct si_shader_context *ctx) /* TESS_EVAL (and also GEOMETRY): * Declare as many input SGPRs as the VS has. */ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ if (!HAVE_32BIT_POINTERS) @@ -4820,7 +4826,7 @@ static void create_function(struct si_shader_context *ctx) declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, true); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); if (shader->key.as_es) { ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -7318,9 +7324,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else { add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); @@ -7328,10 +7332,8 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); + ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index e0d6f701483..1b1f650f869 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -183,7 +183,7 @@ enum { /* TES */ SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, - SI_SGPR_TES_OFFCHIP_ADDR_BASE64K, + SI_SGPR_TES_OFFCHIP_ADDR, SI_TES_NUM_USER_SGPR, /* GFX6-8: TCS only */ @@ -191,8 +191,6 @@ enum { GFX6_SGPR_TCS_OUT_OFFSETS, GFX6_SGPR_TCS_OUT_LAYOUT, GFX6_SGPR_TCS_IN_LAYOUT, - GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K, - GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K, GFX6_TCS_NUM_USER_SGPR, /* GFX9: Merged shaders. */ @@ -211,8 +209,6 @@ enum { GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, - GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K, - GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K, GFX9_TCS_NUM_USER_SGPR, /* GS limits */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 40947ffc079..d33d3a65778 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -163,14 +163,14 @@ struct si_shader_context { * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4 * max = 32*32*4 + 32*4 * [13:18] = gl_PatchVerticesIn, max = 32 + * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers */ int param_tcs_out_lds_layout; - int param_tcs_offchip_addr_base64k; - int param_tcs_factor_addr_base64k; int param_tcs_offchip_offset; int param_tcs_factor_offset; /* API TES */ + int param_tes_offchip_addr; int param_tes_u; int param_tes_v; int param_tes_rel_patch_id; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 3881e3f2a6a..ad470fd9aec 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -231,10 +231,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx, assert(num_tcs_input_cp <= 32); assert(num_tcs_output_cp <= 32); + uint64_t ring_va = r600_resource(sctx->tess_rings)->gpu_address; + assert((ring_va & u_bit_consecutive(0, 19)) == 0); + tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) | S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4); tcs_out_layout = (output_patch_size / 4) | - (num_tcs_input_cp << 13); + (num_tcs_input_cp << 13) | + ring_va; tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16); offchip_layout = *num_patches | @@ -296,7 +300,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Set userdata SGPRs for TES. */ radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2); radeon_emit(cs, offchip_layout); - radeon_emit(cs, r600_resource(sctx->tess_rings)->gpu_address >> 16); + radeon_emit(cs, ring_va); ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 701f7aa575f..ad38a0f31b6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2972,29 +2972,26 @@ static void si_init_tess_factor_ring(struct si_context *sctx) { assert(!sctx->tess_rings); - /* Use 64K alignment for both rings, so that we can pass the address - * to shaders as one SGPR containing bits [16:47]. + /* The address must be aligned to 2^19, because the shader only + * receives the high 13 bits. */ sctx->tess_rings = si_aligned_buffer_create(sctx->b.b.screen, - R600_RESOURCE_FLAG_UNMAPPABLE, + R600_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, - align(sctx->screen->tess_offchip_ring_size, - 64 * 1024) + + sctx->screen->tess_offchip_ring_size + sctx->screen->tess_factor_ring_size, - 64 * 1024); + 1 << 19); if (!sctx->tess_rings) return; si_init_config_add_vgt_flush(sctx); - uint64_t offchip_va = r600_resource(sctx->tess_rings)->gpu_address; - assert((offchip_va & 0xffff) == 0); - uint64_t factor_va = offchip_va + - align(sctx->screen->tess_offchip_ring_size, 64 * 1024); - si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_rings), RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS); + uint64_t factor_va = r600_resource(sctx->tess_rings)->gpu_address + + sctx->screen->tess_offchip_ring_size; + /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, @@ -3015,26 +3012,6 @@ static void si_init_tess_factor_ring(struct si_context *sctx) sctx->screen->vgt_hs_offchip_param); } - if (sctx->b.chip_class >= GFX9) { - si_pm4_set_reg(sctx->init_config, - R_00B430_SPI_SHADER_USER_DATA_LS_0 + - GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4, - offchip_va >> 16); - si_pm4_set_reg(sctx->init_config, - R_00B430_SPI_SHADER_USER_DATA_LS_0 + - GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K * 4, - factor_va >> 16); - } else { - si_pm4_set_reg(sctx->init_config, - R_00B430_SPI_SHADER_USER_DATA_HS_0 + - GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4, - offchip_va >> 16); - si_pm4_set_reg(sctx->init_config, - R_00B430_SPI_SHADER_USER_DATA_HS_0 + - GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K * 4, - factor_va >> 16); - } - /* Flush the context to re-emit the init_config state. * This is done only once in a lifetime of a context. */