From 470952f751d1327831c638ee369b7f0f2e20e6fb Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 26 Oct 2012 18:59:05 -0400 Subject: [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2 On r6xx/r7xx shader resource management need to make sure that the shader does not goes over the gpr register limit. Each specific asic has a maxmimum register that can be split btw shader stage. For each stage the shader must not use more register than the limit programmed. v2: Print an error message when discarding draw. Don't add another boolean to context structure, but rather propagate the discard boolean through the call chain. Signed-off-by: Jerome Glisse --- src/gallium/drivers/r600/r600_pipe.h | 2 +- src/gallium/drivers/r600/r600_state.c | 67 ++++++++++++++------ src/gallium/drivers/r600/r600_state_common.c | 27 ++++---- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 238ab1676f4..342ab87052d 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -620,7 +620,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx); void *r600_create_resolve_blend(struct r600_context *rctx); void *r700_create_resolve_blend(struct r600_context *rctx); void *r600_create_decompress_blend(struct r600_context *rctx); -void r600_adjust_gprs(struct r600_context *rctx); +bool r600_adjust_gprs(struct r600_context *rctx); boolean r600_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f3af5687258..5c52f3d518e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2186,36 +2186,61 @@ void r600_init_state_functions(struct r600_context *rctx) } /* Adjust GPR allocation on R6xx/R7xx */ -void r600_adjust_gprs(struct r600_context *rctx) +bool r600_adjust_gprs(struct r600_context *rctx) { - unsigned num_ps_gprs = rctx->default_ps_gprs; - unsigned num_vs_gprs = rctx->default_vs_gprs; + unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; + unsigned num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; + unsigned new_num_ps_gprs = num_ps_gprs; + unsigned new_num_vs_gprs = num_vs_gprs; + unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + unsigned def_num_ps_gprs = rctx->default_ps_gprs; + unsigned def_num_vs_gprs = rctx->default_vs_gprs; + unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; + /* hardware will reserve twice num_clause_temp_gprs */ + unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2; unsigned tmp; - int diff; - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) { - diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs; - num_vs_gprs -= diff; - num_ps_gprs += diff; - } - - if (rctx->vs_shader->current->shader.bc.ngpr > rctx->default_vs_gprs) - { - diff = rctx->vs_shader->current->shader.bc.ngpr - rctx->default_vs_gprs; - num_ps_gprs -= diff; - num_vs_gprs += diff; + /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */ + if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs) { + /* try to use switch back to default */ + if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs) { + /* always privilege vs stage so that at worst we have the + * pixel stage producing wrong output (not the vertex + * stage) */ + new_num_ps_gprs = max_gprs - (new_num_vs_gprs + def_num_clause_temp_gprs * 2); + new_num_vs_gprs = num_vs_gprs; + } else { + new_num_ps_gprs = def_num_ps_gprs; + new_num_vs_gprs = def_num_vs_gprs; + } + } else { + return true; } - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs); - - if (tmp != rctx->config_state.sq_gpr_resource_mgmt_1) { + /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= + * SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup + * Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS + * it will lockup. So in this case just discard the draw command + * and don't change the current gprs repartitions. + */ + if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs) { + R600_ERR("ps & vs shader require too many register (%d + %d) " + "for a combined maximum of %d\n", + num_ps_gprs, num_vs_gprs, max_gprs); + return false; + } + + /* in some case we endup recomputing the current value */ + tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | + S_008C04_NUM_VS_GPRS(new_num_vs_gprs) | + S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); + if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) { rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; rctx->config_state.atom.dirty = true; rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; } + return true; } void r600_init_atom_start_cs(struct r600_context *rctx) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3503c3964a6..7cd84bcac81 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -756,10 +756,6 @@ static int r600_shader_select(struct pipe_context *ctx, shader->next_variant = sel->current; sel->current = shader; - if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { - r600_adjust_gprs(rctx); - } - if (rctx->ps_shader && rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) { rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs; @@ -815,9 +811,6 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state) rctx->cb_misc_state.multiwrite = multiwrite; rctx->cb_misc_state.atom.dirty = true; } - - if (rctx->vs_shader) - r600_adjust_gprs(rctx); } if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) { @@ -840,9 +833,6 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate); - if (rctx->chip_class < EVERGREEN && rctx->ps_shader) - r600_adjust_gprs(rctx); - /* Update clip misc state. */ if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) { @@ -1033,7 +1023,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask rctx->sample_mask.atom.dirty = true; } -static void r600_update_derived_state(struct r600_context *rctx) +static bool r600_update_derived_state(struct r600_context *rctx) { struct pipe_context * ctx = (struct pipe_context*)rctx; unsigned ps_dirty = 0; @@ -1071,6 +1061,13 @@ static void r600_update_derived_state(struct r600_context *rctx) if (ps_dirty) r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); + if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { + if (!r600_adjust_gprs(rctx)) { + /* discard rendering */ + return false; + } + } + blend_disable = (rctx->dual_src_blend && rctx->ps_shader->current->nr_ps_color_outputs < 2); @@ -1080,6 +1077,7 @@ static void r600_update_derived_state(struct r600_context *rctx) rctx->blend_state.cso, blend_disable); } + return true; } static unsigned r600_conv_prim_to_gs_out(unsigned mode) @@ -1138,7 +1136,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info return; } - r600_update_derived_state(rctx); + if (!r600_update_derived_state(rctx)) { + /* useless to render because current rendering command + * can't be achieved + */ + return; + } if (info.indexed) { /* Initialize the index buffer struct. */ -- 2.30.2