radeonsi/gfx9: rework the gfx9 scissor bug workaround (v2)

author Marek Olšák <marek.olsak@amd.com>

Thu, 18 Apr 2019 19:19:19 +0000 (15:19 -0400)

committer Marek Olšák <marek.olsak@amd.com>

Thu, 25 Apr 2019 15:49:38 +0000 (11:49 -0400)
author Marek Olšák <marek.olsak@amd.com>
Thu, 18 Apr 2019 19:19:19 +0000 (15:19 -0400)
committer Marek Olšák <marek.olsak@amd.com>
Thu, 25 Apr 2019 15:49:38 +0000 (11:49 -0400)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 07642246ab65e0081bc65fcc60003e2604087f47..aaf5138a3a25d3a4242779a7ea1e64bb0c8040b2 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1097,6 +1097,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
  #include "si_debug_options.h"
         }
  
+       sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
+                                       sscreen->info.family == CHIP_RAVEN;
         sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
                                             sscreen->info.family <= CHIP_POLARIS12) ||
                                            sscreen->info.family == CHIP_VEGA10 ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index 7fc0319973b10d3e9e2b9a8af1ea44862d283d29..1d26ca902192b0ce9ca963d6e03425b1b3462ddf 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -468,6 +468,7 @@ struct si_screen {
         bool                            has_out_of_order_rast;
         bool                            assume_no_z_fights;
         bool                            commutative_blend_add;
+       bool                            has_gfx9_scissor_bug;
         bool                            has_msaa_sample_loc_bug;
         bool                            has_ls_vgpr_init_bug;
         bool                            has_dcc_constant_encode;
@@ -1075,7 +1076,7 @@ struct si_context {
         unsigned                        num_resident_handles;
         uint64_t                        num_alloc_tex_transfer_bytes;
         unsigned                        last_tex_ps_draw_ratio; /* for query */
-       unsigned                        context_roll_counter;
+       unsigned                        context_roll;
  
         /* Queries. */
         /* Maintain the list of active queries for pausing between IBs. */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c

index 757c17f7df8ccf1e961627052a973e7155d244dc..bc7e777ad73794f06fb0291ed8ac10bfeea90978 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -256,7 +256,7 @@ static void si_emit_cb_render_state(struct si_context *sctx)
                                             sx_blend_opt_control);
         }
         if (initial_cdw != cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /*
@@ -793,7 +793,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
                 S_028810_CLIP_DISABLE(window_space));
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /*
@@ -1455,7 +1455,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
                                    SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /*
@@ -3544,7 +3544,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
                                    SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1);
  
         if (initial_cdw != cs->current.cdw) {
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  
                 /* GFX9: Flush DFSM when the AA mode changes. */
                 if (sctx->screen->dfsm_allowed) {
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c

index 3516e561282ef3c8232cee63629b14a6aa756a0c..5c6c2e69b90955d1de22c240556bfea70ed99579 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -321,7 +321,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
                                    S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
                                    S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  void si_emit_dpbb_state(struct si_context *sctx)
@@ -443,5 +443,5 @@ void si_emit_dpbb_state(struct si_context *sctx)
                                    S_028060_PUNCHOUT_MODE(punchout_mode) |
                                    S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c

index 80e1bc4b47543f70bf5e4e3f05fff4c27df2f1c1..4b60679484f766cab5349c929425c33875dee996 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -66,7 +66,7 @@ static unsigned si_conv_pipe_prim(unsigned mode)
   * The information about LDS and other non-compile-time parameters is then
   * written to userdata SGPRs.
   */
-static bool si_emit_derived_tess_state(struct si_context *sctx,
+static void si_emit_derived_tess_state(struct si_context *sctx,
                                        const struct pipe_draw_info *info,
                                        unsigned *num_patches)
  {
@@ -110,7 +110,7 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
             (!has_primid_instancing_bug ||
              (sctx->last_tess_uses_primid == tess_uses_primid))) {
                 *num_patches = sctx->last_num_patches;
-               return false;
+               return;
         }
  
         sctx->last_ls = ls_current;
@@ -305,9 +305,8 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
                                                ls_hs_config);
                 }
                 sctx->last_ls_hs_config = ls_hs_config;
-               return true; /* true if the context rolls */
+               sctx->context_roll = true;
         }
-       return false;
  }
  
  static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
@@ -541,7 +540,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
  }
  
  /* rast_prim is the primitive type after GS. */
-static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
+static void si_emit_rasterizer_prim_state(struct si_context *sctx)
  {
         struct radeon_cmdbuf *cs = sctx->gfx_cs;
         enum pipe_prim_type rast_prim = sctx->current_rast_prim;
@@ -549,11 +548,11 @@ static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
  
         /* Skip this if not rendering lines. */
         if (!util_prim_is_lines(rast_prim))
-               return false;
+               return;
  
         if (rast_prim == sctx->last_rast_prim &&
             rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
-               return false;
+               return;
  
         /* For lines, reset the stipple pattern at each primitive. Otherwise,
          * reset the stipple pattern at each packet (line strips, line loops).
@@ -564,7 +563,7 @@ static bool si_emit_rasterizer_prim_state(struct si_context *sctx)
  
         sctx->last_rast_prim = rast_prim;
         sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
-       return true; /* true if the context rolls */
+       sctx->context_roll = true;
  }
  
  static void si_emit_vs_state(struct si_context *sctx,
@@ -659,6 +658,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
                 radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
                                        info->restart_index);
                 sctx->last_restart_index = info->restart_index;
+               sctx->context_roll = true;
         }
  }
  
@@ -886,6 +886,11 @@ static void si_emit_surface_sync(struct si_context *sctx,
                 radeon_emit(cs, 0);               /* CP_COHER_BASE */
                 radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
         }
+
+       /* ACQUIRE_MEM has an implicit context roll if the current context
+        * is busy. */
+       if (sctx->has_graphics)
+               sctx->context_roll = true;
  }
  
  void si_emit_cache_flush(struct si_context *sctx)
@@ -1213,26 +1218,10 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
                                unsigned skip_atom_mask)
  {
         unsigned num_patches = 0;
-       /* Vega10/Raven scissor bug workaround. When any context register is
-        * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
-        * registers must be written too.
-        */
-       bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
-                                 !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
-       bool context_roll = false; /* set correctly for GFX9 only */
  
-       context_roll |= si_emit_rasterizer_prim_state(sctx);
+       si_emit_rasterizer_prim_state(sctx);
         if (sctx->tes_shader.cso)
-               context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches);
-
-       if (handle_scissor_bug &&
-           (info->count_from_stream_output ||
-            sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
-            sctx->dirty_states & si_states_that_always_roll_context() ||
-            si_prim_restart_index_changed(sctx, info)))
-               context_roll = true;
-
-       sctx->context_roll_counter = 0;
+               si_emit_derived_tess_state(sctx, info, &num_patches);
  
         /* Emit state atoms. */
         unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
@@ -1255,12 +1244,6 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
         }
         sctx->dirty_states = 0;
  
-       if (handle_scissor_bug &&
-           (context_roll || sctx->context_roll_counter)) {
-               sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
-               sctx->atoms.s.scissors.emit(sctx);
-       }
-
         /* Emit draw states. */
         si_emit_vs_state(sctx, info);
         si_emit_draw_registers(sctx, info, num_patches);
@@ -1462,6 +1445,22 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
         if (!si_upload_vertex_buffer_descriptors(sctx))
                 goto return_cleanup;
  
+       /* Vega10/Raven scissor bug workaround. When any context register is
+        * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
+        * registers must be written too.
+        */
+       bool has_gfx9_scissor_bug = sctx->screen->has_gfx9_scissor_bug;
+       unsigned masked_atoms = 0;
+
+       if (has_gfx9_scissor_bug) {
+               masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
+
+               if (info->count_from_stream_output ||
+                   sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
+                   sctx->dirty_states & si_states_that_always_roll_context())
+                       sctx->context_roll = true;
+       }
+
         /* Use optimal packet order based on whether we need to sync the pipeline. */
         if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
                                       SI_CONTEXT_FLUSH_AND_INV_DB |
@@ -1472,8 +1471,6 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                  * Then draw and prefetch at the end. This ensures that the time
                  * the CUs are idle is very short.
                  */
-               unsigned masked_atoms = 0;
-
                 if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
                         masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
  
@@ -1487,6 +1484,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
  
                 if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
                         sctx->atoms.s.render_cond.emit(sctx);
+
+               if (has_gfx9_scissor_bug &&
+                   (sctx->context_roll ||
+                    si_is_atom_dirty(sctx, &sctx->atoms.s.scissors))) {
+                       sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+                       sctx->atoms.s.scissors.emit(sctx);
+               }
                 sctx->dirty_atoms = 0;
  
                 si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
@@ -1511,7 +1515,16 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                 if (!si_upload_graphics_shader_descriptors(sctx))
                         return;
  
-               si_emit_all_states(sctx, info, 0);
+               si_emit_all_states(sctx, info, masked_atoms);
+
+               if (has_gfx9_scissor_bug &&
+                   (sctx->context_roll ||
+                    si_is_atom_dirty(sctx, &sctx->atoms.s.scissors))) {
+                       sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+                       sctx->atoms.s.scissors.emit(sctx);
+               }
+               sctx->dirty_atoms = 0;
+
                 si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
  
                 /* Prefetch the remaining shaders after the draw has been
@@ -1520,6 +1533,9 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                         cik_emit_prefetch_L2(sctx, false);
         }
  
+       /* Clear the context roll flag after the draw call. */
+       sctx->context_roll = false;
+
         if (unlikely(sctx->current_saved_cs)) {
                 si_trace_emit(sctx);
                 si_log_draw_state(sctx, sctx->log);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c

index f57e773090561c116ccfcb7904db88e8c7081b8d..55df95477d39b9a026023d54edb2094d3f08df2b 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -576,7 +576,7 @@ static void si_emit_shader_es(struct si_context *sctx)
                                            shader->vgt_vertex_reuse_block_cntl);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -825,7 +825,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
         }
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -1002,7 +1002,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                            shader->vgt_vertex_reuse_block_cntl);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /**
@@ -1194,7 +1194,7 @@ static void si_emit_shader_ps(struct si_context *sctx)
                                    shader->ctx_reg.ps.cb_shader_mask);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  static void si_shader_ps(struct si_shader *shader)
@@ -2877,7 +2877,7 @@ static void si_emit_spi_map(struct si_context *sctx)
                                     sctx->tracked_regs.spi_ps_input_cntl, num_interp);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /**
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c

index 2bf6862c89ba9b2d2ba93ec5f0c14d07f4228550..2a0a4bef9a2d26dfbf87c3f04442ed9a5ab78dbe 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -303,6 +303,7 @@ void si_emit_streamout_end(struct si_context *sctx)
                  * buffer bound. This ensures that the primitives-emitted query
                  * won't increment. */
                 radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+               sctx->context_roll = true;
  
                 t[i]->buf_filled_size_valid = true;
         }
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c

index f988da4520b8dc2d606aef3682a419026ea92b83..6f348a9b58d5ef76c5efb672ca56f06ca0e468b0 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -283,7 +283,7 @@ static void si_emit_guardband(struct si_context *ctx)
                                    S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
                                                        vp_as_scissor.quant_mode));
         if (initial_cdw != ctx->gfx_cs->current.cdw)
-               ctx->context_roll_counter++;
+               ctx->context_roll = true;
  }
  
  static void si_emit_scissors(struct si_context *ctx)
author	Marek Olšák <marek.olsak@amd.com>
	Thu, 18 Apr 2019 19:19:19 +0000 (15:19 -0400)
committer	Marek Olšák <marek.olsak@amd.com>
	Thu, 25 Apr 2019 15:49:38 +0000 (11:49 -0400)
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_binning.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_draw.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_shaders.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_streamout.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_viewport.c		patch \| blob \| history