From bb2b8778cbf7ca6263bf1540708900620a56ace4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 30 Nov 2015 13:15:57 +1000 Subject: [PATCH] r600: make adjust_gprs use hw stages. This changes the r600 specific GPR adjustment code to use the stage defines, and arrays. This is prep work for the tess changes later. Reviewed-by: Oded Gabbay Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_pipe.h | 2 +- src/gallium/drivers/r600/r600_state.c | 117 ++++++++++++++------------ 2 files changed, 64 insertions(+), 55 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5b5eaacf683..e248b372f03 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -432,7 +432,7 @@ struct r600_context { /* Hardware info. */ boolean has_vertex_cache; boolean keep_tiling_flags; - unsigned default_ps_gprs, default_vs_gprs; + unsigned default_gprs[EG_NUM_HW_STAGES]; unsigned r6xx_num_clause_temp_gprs; /* Miscellaneous state objects. */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b11cfeab17e..c7e0007e600 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2044,57 +2044,62 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) /* Adjust GPR allocation on R6xx/R7xx */ bool r600_adjust_gprs(struct r600_context *rctx) { - unsigned num_ps_gprs = rctx->ps_shader->current->shader.bc.ngpr; - unsigned num_vs_gprs, num_es_gprs, num_gs_gprs; - unsigned new_num_ps_gprs = num_ps_gprs; - unsigned new_num_vs_gprs, new_num_es_gprs, new_num_gs_gprs; - unsigned cur_num_ps_gprs = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); - unsigned cur_num_vs_gprs = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); - unsigned cur_num_gs_gprs = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); - unsigned cur_num_es_gprs = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); - unsigned def_num_ps_gprs = rctx->default_ps_gprs; - unsigned def_num_vs_gprs = rctx->default_vs_gprs; - unsigned def_num_gs_gprs = 0; - unsigned def_num_es_gprs = 0; + unsigned num_gprs[R600_NUM_HW_STAGES]; + unsigned new_gprs[R600_NUM_HW_STAGES]; + unsigned cur_gprs[R600_NUM_HW_STAGES]; + unsigned def_gprs[R600_NUM_HW_STAGES]; unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs; - /* hardware will reserve twice num_clause_temp_gprs */ - unsigned max_gprs = def_num_gs_gprs + def_num_es_gprs + def_num_ps_gprs + def_num_vs_gprs + def_num_clause_temp_gprs * 2; + unsigned max_gprs; unsigned tmp, tmp2; + unsigned i; + bool need_recalc = false, use_default = true; + + /* hardware will reserve twice num_clause_temp_gprs */ + max_gprs = def_num_clause_temp_gprs * 2; + for (i = 0; i < R600_NUM_HW_STAGES; i++) { + def_gprs[i] = rctx->default_gprs[i]; + max_gprs += def_gprs[i]; + } + cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1); + cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); + cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2); + + num_gprs[R600_HW_STAGE_PS] = rctx->ps_shader->current->shader.bc.ngpr; if (rctx->gs_shader) { - num_es_gprs = rctx->vs_shader->current->shader.bc.ngpr; - num_gs_gprs = rctx->gs_shader->current->shader.bc.ngpr; - num_vs_gprs = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr; + num_gprs[R600_HW_STAGE_ES] = rctx->vs_shader->current->shader.bc.ngpr; + num_gprs[R600_HW_STAGE_GS] = rctx->gs_shader->current->shader.bc.ngpr; + num_gprs[R600_HW_STAGE_VS] = rctx->gs_shader->current->gs_copy_shader->shader.bc.ngpr; } else { - num_es_gprs = 0; - num_gs_gprs = 0; - num_vs_gprs = rctx->vs_shader->current->shader.bc.ngpr; + num_gprs[R600_HW_STAGE_ES] = 0; + num_gprs[R600_HW_STAGE_GS] = 0; + num_gprs[R600_HW_STAGE_VS] = rctx->vs_shader->current->shader.bc.ngpr; + } + + for (i = 0; i < R600_NUM_HW_STAGES; i++) { + new_gprs[i] = num_gprs[i]; + if (new_gprs[i] > cur_gprs[i]) + need_recalc = true; + if (new_gprs[i] > def_gprs[i]) + use_default = false; } - new_num_vs_gprs = num_vs_gprs; - new_num_es_gprs = num_es_gprs; - new_num_gs_gprs = num_gs_gprs; /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must <= to max_gprs */ - if (new_num_ps_gprs > cur_num_ps_gprs || new_num_vs_gprs > cur_num_vs_gprs || - new_num_es_gprs > cur_num_es_gprs || new_num_gs_gprs > cur_num_gs_gprs) { - /* try to use switch back to default */ - if (new_num_ps_gprs > def_num_ps_gprs || new_num_vs_gprs > def_num_vs_gprs || - new_num_gs_gprs > def_num_gs_gprs || new_num_es_gprs > def_num_es_gprs) { - /* always privilege vs stage so that at worst we have the - * pixel stage producing wrong output (not the vertex - * stage) */ - new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2); - new_num_vs_gprs = num_vs_gprs; - new_num_gs_gprs = num_gs_gprs; - new_num_es_gprs = num_es_gprs; - } else { - new_num_ps_gprs = def_num_ps_gprs; - new_num_vs_gprs = def_num_vs_gprs; - new_num_es_gprs = def_num_es_gprs; - new_num_gs_gprs = def_num_gs_gprs; - } - } else { + if (!need_recalc) return true; + + /* try to use switch back to default */ + if (!use_default) { + /* always privilege vs stage so that at worst we have the + * pixel stage producing wrong output (not the vertex + * stage) */ + new_gprs[R600_HW_STAGE_PS] = max_gprs - def_num_clause_temp_gprs * 2; + for (i = R600_HW_STAGE_VS; i < R600_NUM_HW_STAGES; i++) + new_gprs[R600_HW_STAGE_PS] -= new_gprs[i]; + } else { + for (i = 0; i < R600_NUM_HW_STAGES; i++) + new_gprs[i] = def_gprs[i]; } /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value <= @@ -2103,21 +2108,22 @@ bool r600_adjust_gprs(struct r600_context *rctx) * it will lockup. So in this case just discard the draw command * and don't change the current gprs repartitions. */ - if (num_ps_gprs > new_num_ps_gprs || num_vs_gprs > new_num_vs_gprs || - num_gs_gprs > new_num_gs_gprs || num_es_gprs > new_num_es_gprs) { - R600_ERR("shaders require too many register (%d + %d + %d + %d) " - "for a combined maximum of %d\n", - num_ps_gprs, num_vs_gprs, num_es_gprs, num_gs_gprs, max_gprs); - return false; + for (i = 0; i < R600_NUM_HW_STAGES; i++) { + if (num_gprs[i] > new_gprs[i]) { + R600_ERR("shaders require too many register (%d + %d + %d + %d) " + "for a combined maximum of %d\n", + num_gprs[R600_HW_STAGE_PS], num_gprs[R600_HW_STAGE_VS], num_gprs[R600_HW_STAGE_ES], num_gprs[R600_HW_STAGE_GS], max_gprs); + return false; + } } /* in some case we endup recomputing the current value */ - tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) | - S_008C04_NUM_VS_GPRS(new_num_vs_gprs) | + tmp = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) | + S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) | S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs); - tmp2 = S_008C08_NUM_ES_GPRS(new_num_es_gprs) | - S_008C08_NUM_GS_GPRS(new_num_gs_gprs); + tmp2 = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) | + S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]); if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) { rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2; @@ -2286,8 +2292,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx) break; } - rctx->default_ps_gprs = num_ps_gprs; - rctx->default_vs_gprs = num_vs_gprs; + rctx->default_gprs[R600_HW_STAGE_PS] = num_ps_gprs; + rctx->default_gprs[R600_HW_STAGE_VS] = num_vs_gprs; + rctx->default_gprs[R600_HW_STAGE_GS] = 0; + rctx->default_gprs[R600_HW_STAGE_ES] = 0; + rctx->r6xx_num_clause_temp_gprs = num_temp_gprs; /* SQ_CONFIG */ -- 2.30.2