From 498dc676ea7efac9bce490006a4f5b7f81e9e458 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 18 Aug 2014 23:16:08 +0200 Subject: [PATCH] r600g: copy IA_MULTI_VGT_PARAM programming from radeonsi for Cayman MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Deucher Reviewed-by: Michel Dänzer --- src/gallium/drivers/r600/evergreen_state.c | 2 -- src/gallium/drivers/r600/r600_pipe.h | 2 +- src/gallium/drivers/r600/r600_state_common.c | 24 ++++++++++++++++++++ src/gallium/drivers/r600/r600d.h | 11 +++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index e6e9f49e537..841ad0c8d34 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2243,8 +2243,6 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1); - r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | S_028AA8_PRIMGROUP_SIZE(63)); - r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); r600_store_value(cb, 0x76543210); /* CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */ r600_store_value(cb, 0xfedcba98); /* CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ee836b771f7..e277269cccb 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -40,7 +40,7 @@ /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 -#define R600_MAX_DRAW_CS_DWORDS 37 +#define R600_MAX_DRAW_CS_DWORDS 40 #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d2f0d174c9f..7594d0ef75a 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1418,6 +1418,30 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_emit_atom(rctx, rctx->atoms[i]); } + if (rctx->b.chip_class == CAYMAN) { + /* Copied from radeonsi. */ + unsigned primgroup_size = 128; /* recommended without a GS */ + bool ia_switch_on_eop = false; + bool partial_vs_wave = false; + + if (rctx->gs_shader) + primgroup_size = 64; /* recommended with a GS */ + + if ((rctx->rasterizer && rctx->rasterizer->pa_sc_line_stipple) || + (rctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) { + ia_switch_on_eop = true; + } + + if (rctx->b.streamout.streamout_enabled || + rctx->b.streamout.prims_gen_query_enabled) + partial_vs_wave = true; + + r600_write_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM, + S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | + S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | + S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1)); + } + /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles, * even though it should have no effect on those. */ if (rctx->b.chip_class == R600 && rctx->rasterizer) { diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 8405fbbfa94..17568abd05b 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3747,6 +3747,17 @@ #define SQ_TEX_INST_SAMPLE_C_G_LB 0x1E #define SQ_TEX_INST_SAMPLE_C_G_LZ 0x1F +#define CM_R_028AA8_IA_MULTI_VGT_PARAM 0x028AA8 +#define S_028AA8_PRIMGROUP_SIZE(x) (((x) & 0xFFFF) << 0) +#define G_028AA8_PRIMGROUP_SIZE(x) (((x) >> 0) & 0xFFFF) +#define C_028AA8_PRIMGROUP_SIZE 0xFFFF0000 +#define S_028AA8_PARTIAL_VS_WAVE_ON(x) (((x) & 0x1) << 16) +#define G_028AA8_PARTIAL_VS_WAVE_ON(x) (((x) >> 16) & 0x1) +#define C_028AA8_PARTIAL_VS_WAVE_ON 0xFFFEFFFF +#define S_028AA8_SWITCH_ON_EOP(x) (((x) & 0x1) << 17) +#define G_028AA8_SWITCH_ON_EOP(x) (((x) >> 17) & 0x1) +#define C_028AA8_SWITCH_ON_EOP 0xFFFDFFFF + /* async DMA packets */ #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ (((t) & 0x1) << 23) | \ -- 2.30.2