radeonsi: remove unused parameters from si_shader_apply_scratch_relocs

[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c

index b7f848fd077c2a40a958ced5401656c6ba3745c8..7191e4100a18b71dd77023008ede488fa7ea1b40 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -324,10 +324,10 @@ void si_destroy_shader_cache(struct si_screen *sscreen)
  /* SHADER STATES */
  
  static void si_set_tesseval_regs(struct si_screen *sscreen,
-                                struct si_shader *shader,
+                                struct si_shader_selector *tes,
                                  struct si_pm4_state *pm4)
  {
-       struct tgsi_shader_info *info = &shader->selector->info;
+       struct tgsi_shader_info *info = &tes->info;
         unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
         unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
         bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
@@ -401,26 +401,29 @@ static void si_set_tesseval_regs(struct si_screen *sscreen,
   *     VS as ES | ES -> GS -> VS             | 30
   *    TES as VS | LS -> HS -> VS             | 14 or 30
   *    TES as ES | LS -> HS -> ES -> GS -> VS | 14 or 30
+ *
+ * If "shader" is NULL, it's assumed it's not LS or GS copy shader.
   */
  static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
+                                        struct si_shader_selector *sel,
                                          struct si_shader *shader,
                                          struct si_pm4_state *pm4)
  {
-       unsigned type = shader->selector->type;
+       unsigned type = sel->type;
  
         if (sscreen->b.family < CHIP_POLARIS10)
                 return;
  
         /* VS as VS, or VS as ES: */
         if ((type == PIPE_SHADER_VERTEX &&
-            !shader->key.as_ls &&
-            !shader->is_gs_copy_shader) ||
+            (!shader ||
+             (!shader->key.as_ls && !shader->is_gs_copy_shader))) ||
             /* TES as VS, or TES as ES: */
             type == PIPE_SHADER_TESS_EVAL) {
                 unsigned vtx_reuse_depth = 30;
  
                 if (type == PIPE_SHADER_TESS_EVAL &&
-                   shader->selector->info.properties[TGSI_PROPERTY_TES_SPACING] ==
+                   sel->info.properties[TGSI_PROPERTY_TES_SPACING] ==
                     PIPE_TESS_SPACING_FRACTIONAL_ODD)
                         vtx_reuse_depth = 14;
  
@@ -455,8 +458,10 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
  
         /* We need at least 2 components for LS.
-        * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
-       vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+        * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+        * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+        */
+       vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
  
         si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
         si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
@@ -488,8 +493,15 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
                 si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
  
                 /* We need at least 2 components for LS.
-                * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
-               ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+                * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+                * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+                */
+               ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
+
+               if (shader->config.scratch_bytes_per_wave) {
+                       fprintf(stderr, "HS: scratch buffer unsupported");
+                       abort();
+               }
  
                 shader->config.rsrc2 =
                         S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
@@ -536,10 +548,11 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
  
         if (shader->selector->type == PIPE_SHADER_VERTEX) {
-               vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
+               /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+               vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
                 num_user_sgprs = SI_VS_NUM_USER_SGPR;
         } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
-               vgpr_comp_cnt = 3; /* all components are needed for TES */
+               vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
                 num_user_sgprs = SI_TES_NUM_USER_SGPR;
         } else
                 unreachable("invalid shader selector type");
@@ -562,9 +575,9 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
                        S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
  
         if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
-               si_set_tesseval_regs(sscreen, shader, pm4);
+               si_set_tesseval_regs(sscreen, shader->selector, pm4);
  
-       polaris_set_vgt_vertex_reuse(sscreen, shader, pm4);
+       polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
  }
  
  /**
@@ -751,9 +764,10 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
                 struct gfx9_gs_info gs_info;
  
                 if (es_type == PIPE_SHADER_VERTEX)
-                       es_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
+                       /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+                       es_vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
                 else if (es_type == PIPE_SHADER_TESS_EVAL)
-                       es_vgpr_comp_cnt = 3; /* all components are needed for TES */
+                       es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2;
                 else
                         unreachable("invalid shader selector type");
  
@@ -796,6 +810,17 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
                                S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info.max_prims_per_subgroup));
                 si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
                                shader->key.part.gs.es->esgs_itemsize / 4);
+
+               if (es_type == PIPE_SHADER_TESS_EVAL)
+                       si_set_tesseval_regs(sscreen, shader->key.part.gs.es, pm4);
+
+               polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es,
+                                            NULL, pm4);
+
+               if (shader->config.scratch_bytes_per_wave) {
+                       fprintf(stderr, "GS: scratch buffer unsupported");
+                       abort();
+               }
         } else {
                 si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
                 si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
@@ -828,7 +853,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
         unsigned oc_lds_en;
         unsigned window_space =
            shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
-       bool enable_prim_id = si_vs_exports_prim_id(shader);
+       bool enable_prim_id = shader->key.mono.vs_export_prim_id;
  
         pm4 = si_get_shader_pm4_state(shader);
         if (!pm4)
@@ -842,8 +867,17 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
          * not sent again.
          */
         if (!gs) {
-               si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
-                              S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+               unsigned mode = 0;
+
+               /* PrimID needs GS scenario A.
+                * GFX9 also needs it when ViewportIndex is enabled.
+                */
+               if (enable_prim_id ||
+                   (sscreen->b.chip_class >= GFX9 &&
+                    shader->selector->info.writes_viewport_index))
+                       mode = V_028A40_GS_SCENARIO_A;
+
+               si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, S_028A40_MODE(mode));
                 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
         } else {
                 si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs));
@@ -857,10 +891,14 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                 vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
                 num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
         } else if (shader->selector->type == PIPE_SHADER_VERTEX) {
-               vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
+               /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
+                * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
+                * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+                */
+               vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0);
                 num_user_sgprs = SI_VS_NUM_USER_SGPR;
         } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
-               vgpr_comp_cnt = 3; /* all components are needed for TES */
+               vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
                 num_user_sgprs = SI_TES_NUM_USER_SGPR;
         } else
                 unreachable("invalid shader selector type");
@@ -912,9 +950,9 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                                S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
  
         if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
-               si_set_tesseval_regs(sscreen, shader, pm4);
+               si_set_tesseval_regs(sscreen, shader->selector, pm4);
  
-       polaris_set_vgt_vertex_reuse(sscreen, shader, pm4);
+       polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
  }
  
  static unsigned si_get_ps_num_interp(struct si_shader *ps)
@@ -1242,7 +1280,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                         si_shader_selector_key_hw_vs(sctx, sel, key);
  
                         if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
-                               key->part.vs.epilog.export_prim_id = 1;
+                               key->mono.vs_export_prim_id = 1;
                 }
                 break;
         case PIPE_SHADER_TESS_CTRL:
@@ -1267,7 +1305,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                         si_shader_selector_key_hw_vs(sctx, sel, key);
  
                         if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
-                               key->part.tes.epilog.export_prim_id = 1;
+                               key->mono.vs_export_prim_id = 1;
                 }
                 break;
         case PIPE_SHADER_GEOMETRY:
@@ -1279,6 +1317,25 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                                                           key, &key->part.gs.vs_prolog);
                                 key->part.gs.es = sctx->vs_shader.cso;
                         }
+
+                       /* Merged ES-GS can have unbalanced wave usage.
+                        *
+                        * ES threads are per-vertex, while GS threads are
+                        * per-primitive. So without any amplification, there
+                        * are fewer GS threads than ES threads, which can result
+                        * in empty (no-op) GS waves. With too much amplification,
+                        * there are more GS threads than ES threads, which
+                        * can result in empty (no-op) ES waves.
+                        *
+                        * Non-monolithic shaders are implemented by setting EXEC
+                        * at the beginning of shader parts, and don't jump to
+                        * the end if EXEC is 0.
+                        *
+                        * Monolithic shaders use conditional blocks, so they can
+                        * jump and skip empty waves of ES or GS. So set this to
+                        * always use optimized variants, which are monolithic.
+                        */
+                       key->opt.prefer_mono = 1;
                 }
                 key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
                 break;
@@ -1466,6 +1523,19 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
         return true;
  }
  
+static void si_destroy_shader_selector(struct si_context *sctx,
+                                      struct si_shader_selector *sel);
+
+static void si_shader_selector_reference(struct si_context *sctx,
+                                        struct si_shader_selector **dst,
+                                        struct si_shader_selector *src)
+{
+       if (pipe_reference(&(*dst)->reference, &src->reference))
+               si_destroy_shader_selector(sctx, *dst);
+
+       *dst = src;
+}
+
  /* Select the hw shader variant depending on the current state. */
  static int si_shader_select_with_key(struct si_screen *sscreen,
                                      struct si_shader_ctx_state *state,
@@ -1474,6 +1544,7 @@ static int si_shader_select_with_key(struct si_screen *sscreen,
                                      int thread_index)
  {
         struct si_shader_selector *sel = state->cso;
+       struct si_shader_selector *previous_stage_sel = NULL;
         struct si_shader *current = state->current;
         struct si_shader *iter, *shader = NULL;
  
@@ -1541,6 +1612,14 @@ again:
         shader->key = *key;
         shader->compiler_ctx_state = *compiler_state;
  
+       /* If this is a merged shader, get the first shader's selector. */
+       if (sscreen->b.chip_class >= GFX9) {
+               if (sel->type == PIPE_SHADER_TESS_CTRL)
+                       previous_stage_sel = key->part.tcs.ls;
+               else if (sel->type == PIPE_SHADER_GEOMETRY)
+                       previous_stage_sel = key->part.gs.es;
+       }
+
         /* Compile the main shader part if it doesn't exist. This can happen
          * if the initial guess was wrong. */
         bool is_pure_monolithic =
@@ -1557,22 +1636,18 @@ again:
                  * For merged shaders, check that the starting shader's main
                  * part is present.
                  */
-               if (sscreen->b.chip_class >= GFX9 &&
-                   (sel->type == PIPE_SHADER_TESS_CTRL ||
-                    sel->type == PIPE_SHADER_GEOMETRY)) {
-                       struct si_shader_selector *shader1 = NULL;
+               if (previous_stage_sel) {
                         struct si_shader_key shader1_key = zeroed;
  
-                       if (sel->type == PIPE_SHADER_TESS_CTRL) {
-                               shader1 = key->part.tcs.ls;
+                       if (sel->type == PIPE_SHADER_TESS_CTRL)
                                 shader1_key.as_ls = 1;
-                       } else if (sel->type == PIPE_SHADER_GEOMETRY) {
-                               shader1 = key->part.gs.es;
+                       else if (sel->type == PIPE_SHADER_GEOMETRY)
                                 shader1_key.as_es = 1;
-                       } else
+                       else
                                 assert(0);
  
-                       ok = si_check_missing_main_part(sscreen, shader1,
+                       ok = si_check_missing_main_part(sscreen,
+                                                       previous_stage_sel,
                                                         compiler_state, &shader1_key);
                 } else {
                         ok = si_check_missing_main_part(sscreen, sel,
@@ -1585,6 +1660,15 @@ again:
                 }
         }
  
+       /* Keep the reference to the 1st shader of merged shaders, so that
+        * Gallium can't destroy it before we destroy the 2nd shader.
+        *
+        * Set sctx = NULL, because it's unused if we're not releasing
+        * the shader, and we don't have any sctx here.
+        */
+       si_shader_selector_reference(NULL, &shader->previous_stage_sel,
+                                    previous_stage_sel);
+
         /* Monolithic-only shaders don't make a distinction between optimized
          * and unoptimized. */
         shader->is_monolithic =
@@ -1848,6 +1932,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
         if (!sel)
                 return NULL;
  
+       pipe_reference_init(&sel->reference, 1);
         sel->screen = sscreen;
         sel->compiler_ctx_state.tm = sctx->tm;
         sel->compiler_ctx_state.debug = sctx->b.debug;
@@ -2193,14 +2278,14 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
                 }
         }
  
+       si_shader_selector_reference(sctx, &shader->previous_stage_sel, NULL);
         si_shader_destroy(shader);
         free(shader);
  }
  
-static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
+static void si_destroy_shader_selector(struct si_context *sctx,
+                                      struct si_shader_selector *sel)
  {
-       struct si_context *sctx = (struct si_context *)ctx;
-       struct si_shader_selector *sel = (struct si_shader_selector *)state;
         struct si_shader *p = sel->first_variant, *c;
         struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
                 [PIPE_SHADER_VERTEX] = &sctx->vs_shader,
@@ -2238,6 +2323,14 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
         free(sel);
  }
  
+static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
+{
+       struct si_context *sctx = (struct si_context *)ctx;
+       struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+       si_shader_selector_reference(sctx, &sel, NULL);
+}
+
  static unsigned si_get_ps_input_cntl(struct si_context *sctx,
                                      struct si_shader *vs, unsigned name,
                                      unsigned index, unsigned interpolate)
@@ -2518,7 +2611,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
  
         assert(sctx->scratch_buffer);
  
-       si_shader_apply_scratch_relocs(sctx, shader, &shader->config, scratch_va);
+       si_shader_apply_scratch_relocs(shader, scratch_va);
  
         /* Replace the shader bo with a new bo that has the relocs applied. */
         r = si_shader_binary_upload(sctx->screen, shader);
@@ -2682,11 +2775,14 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
         }
  
         assert(!sctx->tf_ring);
+       /* Use 64K alignment for both rings, so that we can pass the address
+        * to shaders as one SGPR containing bits [16:47].
+        */
         sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
                                                    R600_RESOURCE_FLAG_UNMAPPABLE,
                                                    PIPE_USAGE_DEFAULT,
                                                    32768 * sctx->screen->b.info.max_se,
-                                                  256);
+                                                  64 * 1024);
         if (!sctx->tf_ring)
                 return;
  
@@ -2698,12 +2794,22 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
                                            PIPE_USAGE_DEFAULT,
                                            max_offchip_buffers *
                                            sctx->screen->tess_offchip_block_dw_size * 4,
-                                          256);
+                                          64 * 1024);
         if (!sctx->tess_offchip_ring)
                 return;
  
         si_init_config_add_vgt_flush(sctx);
  
+       uint64_t offchip_va = r600_resource(sctx->tess_offchip_ring)->gpu_address;
+       uint64_t factor_va = r600_resource(sctx->tf_ring)->gpu_address;
+       assert((offchip_va & 0xffff) == 0);
+       assert((factor_va & 0xffff) == 0);
+
+       si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_offchip_ring),
+                     RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
+       si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tf_ring),
+                     RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
+
         /* Append these registers to the init config state. */
         if (sctx->b.chip_class >= CIK) {
                 if (sctx->b.chip_class >= VI)
@@ -2712,10 +2818,10 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
                 si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
                                S_030938_SIZE(sctx->tf_ring->width0 / 4));
                 si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
-                              r600_resource(sctx->tf_ring)->gpu_address >> 8);
+                              factor_va >> 8);
                 if (sctx->b.chip_class >= GFX9)
                         si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
-                                      r600_resource(sctx->tf_ring)->gpu_address >> 40);
+                                      factor_va >> 40);
                 si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
                              S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
                              S_03093C_OFFCHIP_GRANULARITY(offchip_granularity));
@@ -2724,24 +2830,37 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
                 si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
                                S_008988_SIZE(sctx->tf_ring->width0 / 4));
                 si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
-                              r600_resource(sctx->tf_ring)->gpu_address >> 8);
+                              factor_va >> 8);
                 si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
                                S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers));
         }
  
+       if (sctx->b.chip_class >= GFX9) {
+               si_pm4_set_reg(sctx->init_config,
+                              R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+                              GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4,
+                              offchip_va >> 16);
+               si_pm4_set_reg(sctx->init_config,
+                              R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+                              GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K * 4,
+                              factor_va >> 16);
+       } else {
+               si_pm4_set_reg(sctx->init_config,
+                              R_00B430_SPI_SHADER_USER_DATA_HS_0 +
+                              GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4,
+                              offchip_va >> 16);
+               si_pm4_set_reg(sctx->init_config,
+                              R_00B430_SPI_SHADER_USER_DATA_HS_0 +
+                              GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K * 4,
+                              factor_va >> 16);
+       }
+
         /* Flush the context to re-emit the init_config state.
          * This is done only once in a lifetime of a context.
          */
         si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
         sctx->b.initial_gfx_cs_size = 0; /* force flush */
         si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
-
-       si_set_ring_buffer(&sctx->b.b, SI_HS_RING_TESS_FACTOR, sctx->tf_ring,
-                          0, sctx->tf_ring->width0, false, false, 0, 0, 0);
-
-       si_set_ring_buffer(&sctx->b.b, SI_HS_RING_TESS_OFFCHIP,
-                          sctx->tess_offchip_ring, 0,
-                          sctx->tess_offchip_ring->width0, false, false, 0, 0, 0);
  }
  
  /**