+
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to the const buffer.
+
+ * const buffer contains -
+ * uint32_t input_patch_size
+ * uint32_t input_vertex_size
+ * uint32_t num_tcs_input_cp
+ * uint32_t num_tcs_output_cp;
+ * uint32_t output_patch_size
+ * uint32_t output_vertex_size
+ * uint32_t output_patch0_offset
+ * uint32_t perpatch_output_offset
+ * and the same constbuf is bound to LS/HS/VS(ES).
+ */
+void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches)
+{
+ struct pipe_constant_buffer constbuf = {0};
+ struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
+ struct r600_pipe_shader_selector *ls = rctx->vs_shader;
+ unsigned num_tcs_input_cp = info->vertices_per_patch;
+ unsigned num_tcs_outputs;
+ unsigned num_tcs_output_cp;
+ unsigned num_tcs_patch_outputs;
+ unsigned num_tcs_inputs;
+ unsigned input_vertex_size, output_vertex_size;
+ unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
+ unsigned output_patch0_offset, perpatch_output_offset, lds_size;
+ uint32_t values[16];
+ unsigned num_waves;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
+ unsigned wave_divisor = (16 * num_pipes);
+
+ *num_patches = 1;
+
+ if (!rctx->tes_shader) {
+ rctx->lds_alloc = 0;
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+ R600_LDS_INFO_CONST_BUFFER, NULL);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+ R600_LDS_INFO_CONST_BUFFER, NULL);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+ R600_LDS_INFO_CONST_BUFFER, NULL);
+ return;
+ }
+
+ if (rctx->lds_alloc != 0 &&
+ rctx->last_ls == ls &&
+ !rctx->tess_state_dirty &&
+ rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
+ rctx->last_tcs == tcs)
+ return;
+
+ num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask);
+
+ if (rctx->tcs_shader) {
+ num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask);
+ num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+ num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask);
+ } else {
+ num_tcs_outputs = num_tcs_inputs;
+ num_tcs_output_cp = num_tcs_input_cp;
+ num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+ }
+
+ /* size in bytes */
+ input_vertex_size = num_tcs_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0;
+ perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+ lds_size = output_patch0_offset + output_patch_size * *num_patches;
+
+ values[0] = input_patch_size;
+ values[1] = input_vertex_size;
+ values[2] = num_tcs_input_cp;
+ values[3] = num_tcs_output_cp;
+
+ values[4] = output_patch_size;
+ values[5] = output_vertex_size;
+ values[6] = output_patch0_offset;
+ values[7] = perpatch_output_offset;
+
+ /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES *
+ LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */
+ num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor);
+
+ rctx->lds_alloc = (lds_size | (num_waves << 14));
+
+ memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
+ values[14] = 0;
+ values[15] = 0;
+
+ rctx->tess_state_dirty = false;
+ rctx->last_ls = ls;
+ rctx->last_tcs = tcs;
+ rctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
+ constbuf.user_buffer = values;
+ constbuf.buffer_size = 16 * 4;
+
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+ R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+ R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+ R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ pipe_resource_reference(&constbuf.buffer, NULL);
+}
+
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+ const struct pipe_draw_info *info,
+ unsigned num_patches)
+{
+ unsigned num_output_cp;
+
+ if (!rctx->tes_shader)
+ return 0;
+
+ num_output_cp = rctx->tcs_shader ?
+ rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ info->vertices_per_patch;
+
+ return S_028B58_NUM_PATCHES(num_patches) |
+ S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+ S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
+}
+
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+ struct radeon_winsys_cs *cs,
+ uint32_t ls_hs_config)
+{
+ radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+}
+
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+ struct radeon_winsys_cs *cs,
+ uint32_t lds_alloc)
+{
+ radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
+}
+
+/* on evergreen if you are running tessellation you need to disable dynamic
+ GPRs to workaround a hardware bug.*/
+bool evergreen_adjust_gprs(struct r600_context *rctx)
+{
+ unsigned num_gprs[EG_NUM_HW_STAGES];
+ unsigned def_gprs[EG_NUM_HW_STAGES];
+ unsigned cur_gprs[EG_NUM_HW_STAGES];
+ unsigned new_gprs[EG_NUM_HW_STAGES];
+ unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
+ unsigned max_gprs;
+ unsigned i;
+ unsigned total_gprs;
+ unsigned tmp[3];
+ bool rework = false, set_default = false, set_dirty = false;
+ max_gprs = 0;
+ for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+ def_gprs[i] = rctx->default_gprs[i];
+ max_gprs += def_gprs[i];
+ }
+ max_gprs += def_num_clause_temp_gprs * 2;
+
+ /* if we have no TESS and dyn gpr is enabled then do nothing. */
+ if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader) {
+ if (rctx->config_state.dyn_gpr_enabled)
+ return true;
+
+ /* transition back to dyn gpr enabled state */
+ rctx->config_state.dyn_gpr_enabled = true;
+ r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
+ rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+ return true;
+ }
+
+
+ /* gather required shader gprs */
+ for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+ if (rctx->hw_shader_stages[i].shader)
+ num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr;
+ else
+ num_gprs[i] = 0;
+ }
+
+ cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+ cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+ cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+ cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+ cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
+ cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
+
+ total_gprs = 0;
+ for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+ new_gprs[i] = num_gprs[i];
+ total_gprs += num_gprs[i];
+ }
+
+ if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs)))
+ return false;
+
+ for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+ if (new_gprs[i] > cur_gprs[i]) {
+ rework = true;
+ break;
+ }
+ }
+
+ if (rctx->config_state.dyn_gpr_enabled) {
+ set_dirty = true;
+ rctx->config_state.dyn_gpr_enabled = false;
+ }
+
+ if (rework) {
+ set_default = true;
+ for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+ if (new_gprs[i] > def_gprs[i])
+ set_default = false;
+ }
+
+ if (set_default) {
+ for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+ new_gprs[i] = def_gprs[i];
+ }
+ } else {
+ unsigned ps_value = max_gprs;
+
+ ps_value -= (def_num_clause_temp_gprs * 2);
+ for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++)
+ ps_value -= new_gprs[i];
+
+ new_gprs[R600_HW_STAGE_PS] = ps_value;
+ }
+
+ tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
+ S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
+ S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
+
+ tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
+ S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
+
+ tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) |
+ S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]);
+
+ if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] ||
+ rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] ||
+ rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) {
+ rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0];
+ rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1];
+ rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2];
+ set_dirty = true;
+ }
+ }
+
+
+ if (set_dirty) {
+ r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
+ rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+ }
+ return true;
+}