From 09d25a9b37eeb34b3475fe486b82e12e904bcb28 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 30 Nov 2015 15:22:06 +1000 Subject: [PATCH] r600/eg: workaround bug with tess shader and dynamic GPRs. When using tessellation on eg/ni chipsets, we must disable dynamic GPRs to workaround a hw bug where the GPU hangs when too many things get queued. This implements something like the r600 code to emit the transition between static and dynamic GPRs, and to statically allocate GPRs when tessellation is enabled. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_compute.c | 6 +- src/gallium/drivers/r600/evergreen_state.c | 226 ++++++++++++++----- src/gallium/drivers/r600/r600_hw_context.c | 2 +- src/gallium/drivers/r600/r600_pipe.h | 10 +- src/gallium/drivers/r600/r600_state_common.c | 7 + 5 files changed, 190 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a3e198c6fcf..ef6de8c98d1 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -432,6 +432,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); + /* emit config state */ + if (ctx->b.chip_class == EVERGREEN) + r600_emit_atom(ctx, &ctx->config_state.atom); + ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); @@ -791,7 +795,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) /* Config Registers */ if (ctx->b.chip_class < CAYMAN) - evergreen_init_common_regs(cb, ctx->b.chip_class, ctx->b.family, + evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family, ctx->screen->b.info.drm_minor); else cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 79cdd7c2cda..229baab8cba 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -869,6 +869,33 @@ evergreen_create_sampler_view(struct pipe_context *ctx, tex->width0, tex->height0, 0); } +static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + struct r600_config_state *a = (struct r600_config_state*)atom; + + radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3); + if (a->dyn_gpr_enabled) { + radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs)); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + } else { + radeon_emit(cs, a->sq_gpr_resource_mgmt_1); + radeon_emit(cs, a->sq_gpr_resource_mgmt_2); + radeon_emit(cs, a->sq_gpr_resource_mgmt_3); + } + radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8)); + if (a->dyn_gpr_enabled) { + radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, + S_028838_PS_GPRS(0x1e) | + S_028838_VS_GPRS(0x1e) | + S_028838_GS_GPRS(0x1e) | + S_028838_ES_GPRS(0x1e) | + S_028838_HS_GPRS(0x1e) | + S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ + } +} + static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->b.gfx.cs; @@ -2553,10 +2580,10 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF); } -void evergreen_init_common_regs(struct r600_command_buffer *cb, - enum chip_class ctx_chip_class, - enum radeon_family ctx_family, - int ctx_drm_minor) +void evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb, + enum chip_class ctx_chip_class, + enum radeon_family ctx_family, + int ctx_drm_minor) { int ps_prio; int vs_prio; @@ -2567,31 +2594,23 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb, int cs_prio; int ls_prio; - int num_ps_gprs; - int num_vs_gprs; - int num_gs_gprs; - int num_es_gprs; - int num_hs_gprs; - int num_ls_gprs; - int num_temp_gprs; - unsigned tmp; ps_prio = 0; vs_prio = 1; gs_prio = 2; es_prio = 3; - hs_prio = 0; - ls_prio = 0; + hs_prio = 3; + ls_prio = 3; cs_prio = 0; - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; + rctx->default_gprs[R600_HW_STAGE_PS] = 93; + rctx->default_gprs[R600_HW_STAGE_VS] = 46; + rctx->r6xx_num_clause_temp_gprs = 4; + rctx->default_gprs[R600_HW_STAGE_GS] = 31; + rctx->default_gprs[R600_HW_STAGE_ES] = 31; + rctx->default_gprs[EG_HW_STAGE_HS] = 23; + rctx->default_gprs[EG_HW_STAGE_LS] = 23; tmp = 0; switch (ctx_family) { @@ -2614,40 +2633,12 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb, tmp |= S_008C00_GS_PRIO(gs_prio); tmp |= S_008C00_ES_PRIO(es_prio); - /* enable dynamic GPR resource management */ - if (ctx_drm_minor >= 7) { - r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2); - r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */ - /* always set temp clauses */ - r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ - r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); - r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ - r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ - r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8)); - r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, - S_028838_PS_GPRS(0x1e) | - S_028838_VS_GPRS(0x1e) | - S_028838_GS_GPRS(0x1e) | - S_028838_ES_GPRS(0x1e) | - S_028838_HS_GPRS(0x1e) | - S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ - } else { - r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4); - r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */ - - tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */ - - tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */ + r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1); + r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */ - tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs); - tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs); - r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */ - } + r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2); + r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */ + r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */ /* The cs checker requires this register to be set. */ r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); @@ -2694,7 +2685,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx) r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - evergreen_init_common_regs(cb, rctx->b.chip_class, + evergreen_init_common_regs(rctx, cb, rctx->b.chip_class, rctx->b.family, rctx->screen->b.info.drm_minor); family = rctx->b.family; @@ -3693,7 +3684,11 @@ void evergreen_init_state_functions(struct r600_context *rctx) * or piglit regression). * !!! */ - + if (rctx->b.chip_class == EVERGREEN) { + r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11); + if (rctx->screen->b.info.drm_minor >= 7) + rctx->config_state.dyn_gpr_enabled = true; + } r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0); /* shader const */ r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0); @@ -3921,3 +3916,122 @@ void evergreen_set_lds_alloc(struct r600_context *rctx, { radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc); } + +/* on evergreen if you are running tessellation you need to disable dynamic + GPRs to workaround a hardware bug.*/ +bool evergreen_adjust_gprs(struct r600_context *rctx) +{ + unsigned num_gprs[EG_NUM_HW_STAGES]; + unsigned def_gprs[EG_NUM_HW_STAGES]; + unsigned cur_gprs[EG_NUM_HW_STAGES]; + unsigned new_gprs[EG_NUM_HW_STAGES]; + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; + unsigned max_gprs; + unsigned i; + unsigned total_gprs; + unsigned tmp[3]; + bool rework = false, set_default = false, set_dirty = false; + max_gprs = 0; + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + def_gprs[i] = rctx->default_gprs[i]; + max_gprs += def_gprs[i]; + } + max_gprs += def_num_clause_temp_gprs * 2; + + /* if we have no TESS and dyn gpr is enabled then do nothing. */ + if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader || rctx->screen->b.info.drm_minor < 7) { + if (rctx->config_state.dyn_gpr_enabled) + return true; + + /* transition back to dyn gpr enabled state */ + rctx->config_state.dyn_gpr_enabled = true; + r600_mark_atom_dirty(rctx, &rctx->config_state.atom); + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; + return true; + } + + + /* gather required shader gprs */ + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + if (rctx->hw_shader_stages[i].shader) + num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr; + else + num_gprs[i] = 0; + } + + cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); + cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); + cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3); + cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3); + + total_gprs = 0; + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + new_gprs[i] = num_gprs[i]; + total_gprs += num_gprs[i]; + } + + if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs))) + return false; + + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + if (new_gprs[i] > cur_gprs[i]) { + rework = true; + break; + } + } + + if (rctx->config_state.dyn_gpr_enabled) { + set_dirty = true; + rctx->config_state.dyn_gpr_enabled = false; + } + + if (rework) { + set_default = true; + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + if (new_gprs[i] > def_gprs[i]) + set_default = false; + } + + if (set_default) { + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + new_gprs[i] = def_gprs[i]; + } + } else { + unsigned ps_value = max_gprs; + + ps_value -= (def_num_clause_temp_gprs * 2); + for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++) + ps_value -= new_gprs[i]; + + new_gprs[R600_HW_STAGE_PS] = ps_value; + } + + tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) | + S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) | + S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); + + tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) | + S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]); + + tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) | + S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]); + + if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] || + rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] || + rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) { + rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0]; + rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1]; + rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2]; + set_dirty = true; + } + } + + + if (set_dirty) { + r600_mark_atom_dirty(rctx, &rctx->config_state.atom); + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; + } + return true; +} diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index b7845b5c19d..90b99e8a275 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -310,7 +310,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8; r600_mark_atom_dirty(ctx, &ctx->viewport.atom); - if (ctx->b.chip_class < EVERGREEN) { + if (ctx->b.chip_class <= EVERGREEN) { r600_mark_atom_dirty(ctx, &ctx->config_state.atom); } r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index f1651df0565..795fb9a9513 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,7 +38,7 @@ #include "tgsi/tgsi_scan.h" -#define R600_NUM_ATOMS 51 +#define R600_NUM_ATOMS 52 #define R600_MAX_VIEWPORTS 16 @@ -206,6 +206,8 @@ struct r600_config_state { struct r600_atom atom; unsigned sq_gpr_resource_mgmt_1; unsigned sq_gpr_resource_mgmt_2; + unsigned sq_gpr_resource_mgmt_3; + bool dyn_gpr_enabled; }; struct r600_stencil_ref @@ -441,6 +443,7 @@ struct r600_context { boolean has_vertex_cache; boolean keep_tiling_flags; unsigned default_gprs[EG_NUM_HW_STAGES]; + unsigned current_gprs[EG_NUM_HW_STAGES]; unsigned r6xx_num_clause_temp_gprs; /* Miscellaneous state objects. */ @@ -608,7 +611,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, const struct pipe_sampler_view *state, unsigned width0, unsigned height0, unsigned force_level); -void evergreen_init_common_regs(struct r600_command_buffer *cb, +void evergreen_init_common_regs(struct r600_context *ctx, + struct r600_command_buffer *cb, enum chip_class ctx_chip_class, enum radeon_family ctx_family, int ctx_drm_minor); @@ -639,7 +643,7 @@ void evergreen_init_color_surface(struct r600_context *rctx, void evergreen_init_color_surface_rat(struct r600_context *rctx, struct r600_surface *surf); void evergreen_update_db_shader_control(struct r600_context * rctx); - +bool evergreen_adjust_gprs(struct r600_context *rctx); /* r600_blit.c */ void r600_init_blit_functions(struct r600_context *rctx); void r600_decompress_depth_textures(struct r600_context *rctx, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 1a18e695fa8..6a666343b06 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1624,6 +1624,13 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } + if (rctx->b.chip_class == EVERGREEN) { + if (!evergreen_adjust_gprs(rctx)) { + /* discard rendering */ + return false; + } + } + blend_disable = (rctx->dual_src_blend && rctx->ps_shader->current->nr_ps_color_outputs < 2); -- 2.30.2