From 931a8d0c9a15df462f14ab40f9ae31c8ecf75376 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 28 Mar 2017 06:13:09 +1000 Subject: [PATCH] radv: rework vertex/export shader output handling In order to faciliate adding tess support, split the vs/es output info into a separate block, so we make it easier to have the tess shaders export the same info. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 54 ++++++++++++++------------- src/amd/common/ac_nir_to_llvm.h | 30 +++++++++------ src/amd/vulkan/radv_cmd_buffer.c | 64 +++++++++++++++++--------------- src/amd/vulkan/radv_pipeline.c | 8 ++-- 4 files changed, 86 insertions(+), 70 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6e36c192c3c..cfbdeae1a3b 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4228,11 +4228,11 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx, int length = glsl_get_length(variable->type); if (idx == VARYING_SLOT_CLIP_DIST0) { if (ctx->stage == MESA_SHADER_VERTEX) - ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1; + ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1; ctx->num_output_clips = length; } else if (idx == VARYING_SLOT_CULL_DIST0) { if (ctx->stage == MESA_SHADER_VERTEX) - ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1; + ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1; ctx->num_output_culls = length; } if (length > 4) @@ -4448,7 +4448,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, } static void -handle_vs_outputs_post(struct nir_to_llvm_context *ctx) +handle_vs_outputs_post(struct nir_to_llvm_context *ctx, + struct ac_vs_output_info *outinfo) { uint32_t param_count = 0; unsigned target; @@ -4461,14 +4462,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) (1ull << VARYING_SLOT_CULL_DIST0) | (1ull << VARYING_SLOT_CULL_DIST1)); - ctx->shader_info->vs.prim_id_output = 0xffffffff; - ctx->shader_info->vs.layer_output = 0xffffffff; + outinfo->prim_id_output = 0xffffffff; + outinfo->layer_output = 0xffffffff; if (clip_mask) { LLVMValueRef slots[8]; unsigned j; - if (ctx->shader_info->vs.cull_dist_mask) - ctx->shader_info->vs.cull_dist_mask <<= ctx->num_output_clips; + if (outinfo->cull_dist_mask) + outinfo->cull_dist_mask <<= ctx->num_output_clips; i = VARYING_SLOT_CLIP_DIST0; for (j = 0; j < ctx->num_output_clips; j++) @@ -4513,25 +4514,25 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) i == VARYING_SLOT_CULL_DIST1) { continue; } else if (i == VARYING_SLOT_PSIZ) { - ctx->shader_info->vs.writes_pointsize = true; + outinfo->writes_pointsize = true; psize_value = values[0]; continue; } else if (i == VARYING_SLOT_LAYER) { - ctx->shader_info->vs.writes_layer = true; + outinfo->writes_layer = true; layer_value = values[0]; - ctx->shader_info->vs.layer_output = param_count; + outinfo->layer_output = param_count; target = V_008DFC_SQ_EXP_PARAM + param_count; param_count++; } else if (i == VARYING_SLOT_VIEWPORT) { - ctx->shader_info->vs.writes_viewport_index = true; + outinfo->writes_viewport_index = true; viewport_index_value = values[0]; continue; } else if (i == VARYING_SLOT_PRIMITIVE_ID) { - ctx->shader_info->vs.prim_id_output = param_count; + outinfo->prim_id_output = param_count; target = V_008DFC_SQ_EXP_PARAM + param_count; param_count++; } else if (i >= VARYING_SLOT_VAR0) { - ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0); + outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); target = V_008DFC_SQ_EXP_PARAM + param_count; param_count++; } @@ -4560,9 +4561,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) pos_args[0].out[3] = ctx->f32one; /* W */ } - uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) | - (ctx->shader_info->vs.writes_layer == true ? 4 : 0) | - (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0)); + uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) | + (outinfo->writes_layer == true ? 4 : 0) | + (outinfo->writes_viewport_index == true ? 8 : 0)); if (mask) { pos_args[1].enabled_channels = mask; pos_args[1].valid_mask = 0; @@ -4574,11 +4575,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) pos_args[1].out[2] = ctx->f32zero; /* Z */ pos_args[1].out[3] = ctx->f32zero; /* W */ - if (ctx->shader_info->vs.writes_pointsize == true) + if (outinfo->writes_pointsize == true) pos_args[1].out[0] = psize_value; - if (ctx->shader_info->vs.writes_layer == true) + if (outinfo->writes_layer == true) pos_args[1].out[2] = layer_value; - if (ctx->shader_info->vs.writes_viewport_index == true) + if (outinfo->writes_viewport_index == true) pos_args[1].out[3] = viewport_index_value; } for (i = 0; i < 4; i++) { @@ -4598,12 +4599,13 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) ac_build_export(&ctx->ac, &pos_args[i]); } - ctx->shader_info->vs.pos_exports = num_pos_exports; - ctx->shader_info->vs.param_exports = param_count; + outinfo->pos_exports = num_pos_exports; + outinfo->param_exports = param_count; } static void -handle_es_outputs_post(struct nir_to_llvm_context *ctx) +handle_es_outputs_post(struct nir_to_llvm_context *ctx, + struct ac_es_output_info *outinfo) { int j; uint64_t max_output_written = 0; @@ -4638,7 +4640,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx) 1, 1, true, true); } } - ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16; + outinfo->esgs_itemsize = (max_output_written + 1) * 16; } static void @@ -4761,9 +4763,9 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx) switch (ctx->stage) { case MESA_SHADER_VERTEX: if (ctx->options->key.vs.as_es) - handle_es_outputs_post(ctx); + handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); else - handle_vs_outputs_post(ctx); + handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo); break; case MESA_SHADER_FRAGMENT: handle_fs_outputs_post(ctx); @@ -5170,7 +5172,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) } idx += slot_inc; } - handle_vs_outputs_post(ctx); + handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo); } void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index b4c4a73a1c1..15afbd77456 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -91,6 +91,23 @@ struct ac_userdata_locations { struct ac_userdata_info shader_data[AC_UD_MAX_UD]; }; +struct ac_vs_output_info { + uint8_t clip_dist_mask; + uint8_t cull_dist_mask; + bool writes_pointsize; + bool writes_layer; + bool writes_viewport_index; + uint32_t prim_id_output; + uint32_t layer_output; + uint32_t export_mask; + unsigned param_exports; + unsigned pos_exports; +}; + +struct ac_es_output_info { + uint32_t esgs_itemsize; +}; + struct ac_shader_variant_info { struct ac_userdata_locations user_sgprs_locs; unsigned num_user_sgprs; @@ -98,19 +115,10 @@ struct ac_shader_variant_info { unsigned num_input_vgprs; union { struct { - unsigned param_exports; - unsigned pos_exports; + struct ac_vs_output_info outinfo; + struct ac_es_output_info es_info; unsigned vgpr_comp_cnt; - uint32_t export_mask; - bool writes_pointsize; - bool writes_layer; - bool writes_viewport_index; bool as_es; - uint8_t clip_dist_mask; - uint8_t cull_dist_mask; - uint32_t esgs_itemsize; - uint32_t prim_id_output; - uint32_t layer_output; } vs; struct { unsigned num_interp; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index eb2a7b0ddea..ce34204b8ac 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -502,7 +502,8 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, static void radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) + struct radv_shader_variant *shader, + struct ac_vs_output_info *outinfo) { struct radeon_winsys *ws = cmd_buffer->device->ws; uint64_t va = ws->buffer_get_va(shader->bo); @@ -510,19 +511,19 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - export_count = MAX2(1, shader->info.vs.param_exports); + export_count = MAX2(1, outinfo->param_exports); radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(export_count - 1)); radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(shader->info.vs.pos_exports > 1 ? + S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(shader->info.vs.pos_exports > 2 ? + S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(shader->info.vs.pos_exports > 3 ? + S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); @@ -540,17 +541,17 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); unsigned clip_dist_mask, cull_dist_mask, total_mask; - clip_dist_mask = shader->info.vs.clip_dist_mask; - cull_dist_mask = shader->info.vs.cull_dist_mask; + clip_dist_mask = outinfo->clip_dist_mask; + cull_dist_mask = outinfo->cull_dist_mask; total_mask = clip_dist_mask | cull_dist_mask; radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, - S_02881C_USE_VTX_POINT_SIZE(shader->info.vs.writes_pointsize) | - S_02881C_USE_VTX_RENDER_TARGET_INDX(shader->info.vs.writes_layer) | - S_02881C_USE_VTX_VIEWPORT_INDX(shader->info.vs.writes_viewport_index) | - S_02881C_VS_OUT_MISC_VEC_ENA(shader->info.vs.writes_pointsize || - shader->info.vs.writes_layer || - shader->info.vs.writes_viewport_index) | + S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | + S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | + S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | + S_02881C_VS_OUT_MISC_VEC_ENA(outinfo->writes_pointsize || + outinfo->writes_layer || + outinfo->writes_viewport_index) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | pipeline->graphics.raster.pa_cl_vs_out_cntl | @@ -558,12 +559,13 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, clip_dist_mask); radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, - S_028AB4_REUSE_OFF(shader->info.vs.writes_viewport_index)); + S_028AB4_REUSE_OFF(outinfo->writes_viewport_index)); } static void radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader) + struct radv_shader_variant *shader, + struct ac_es_output_info *outinfo) { struct radeon_winsys *ws = cmd_buffer->device->ws; uint64_t va = ws->buffer_get_va(shader->bo); @@ -571,7 +573,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - shader->info.vs.esgs_itemsize / 4); + outinfo->esgs_itemsize / 4); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); @@ -590,9 +592,9 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, vs = pipeline->shaders[MESA_SHADER_VERTEX]; if (vs->info.vs.as_es) - radv_emit_hw_es(cmd_buffer, vs); + radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info); else - radv_emit_hw_vs(cmd_buffer, pipeline, vs); + radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo); radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0); } @@ -666,7 +668,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, gs->rsrc1); radeon_emit(cmd_buffer->cs, gs->rsrc2); - radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader); + radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo); struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, AC_UD_GS_VS_RING_STRIDE_ENTRIES); @@ -696,10 +698,14 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_blend_state *blend = &pipeline->graphics.blend; unsigned ps_offset = 0; unsigned z_order; + struct ac_vs_output_info *outinfo; assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX]; + + outinfo = &vs->info.vs.outinfo; + va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); @@ -757,20 +763,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, ps_offset++; } - if (ps->info.fs.prim_id_input && (vs->info.vs.prim_id_output != 0xffffffff)) { + if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) { unsigned vs_offset, flat_shade; unsigned val; - vs_offset = vs->info.vs.prim_id_output; + vs_offset = outinfo->prim_id_output; flat_shade = true; val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); ++ps_offset; } - if (ps->info.fs.layer_input && (vs->info.vs.layer_output != 0xffffffff)) { + if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) { unsigned vs_offset, flat_shade; unsigned val; - vs_offset = vs->info.vs.layer_output; + vs_offset = outinfo->layer_output; flat_shade = true; val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); @@ -785,20 +791,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, continue; - if (!(vs->info.vs.export_mask & (1u << i))) { + if (!(outinfo->export_mask & (1u << i))) { radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, S_028644_OFFSET(0x20)); ++ps_offset; continue; } - vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1)); - if (vs->info.vs.prim_id_output != 0xffffffff) { - if (vs_offset >= vs->info.vs.prim_id_output) + vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1)); + if (outinfo->prim_id_output != 0xffffffff) { + if (vs_offset >= outinfo->prim_id_output) vs_offset++; } - if (vs->info.vs.layer_output != 0xffffffff) { - if (vs_offset >= vs->info.vs.layer_output) + if (outinfo->layer_output != 0xffffffff) { + if (vs_offset >= outinfo->layer_output) vs_offset++; } flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 1becb65055b..2c710f4eb8f 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1467,15 +1467,15 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline) unsigned alignment = 256 * num_se; /* The maximum size is 63.999 MB per SE. */ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; - + struct ac_es_output_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info; struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info; - struct ac_shader_variant_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info; + /* Calculate the minimum size. */ - unsigned min_esgs_ring_size = align(es_info->vs.esgs_itemsize * gs_vertex_reuse * + unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse * wave_size, alignment); /* These are recommended sizes, not minimum sizes. */ unsigned esgs_ring_size = max_gs_waves * 2 * wave_size * - es_info->vs.esgs_itemsize * gs_info->gs.vertices_in; + es_info->esgs_itemsize * gs_info->gs.vertices_in; unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1); -- 2.30.2